blob: cadca1685889228c5d0a38b260689f666d7c07ea [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Guido van Rossum03093a21994-09-28 15:51:32 +000012#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#include <limits.h>
14#else
15#ifndef UCHAR_MAX
16#define UCHAR_MAX 255
17#endif
18#endif
19
Guido van Rossumc0b618a1997-05-02 03:12:38 +000020static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000021#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000022static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000023#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000024
25/*
26 Newsizedstringobject() and newstringobject() try in certain cases
27 to share string objects. When the size of the string is zero,
28 these routines always return a pointer to the same string object;
29 when the size is one, they return a pointer to an already existing
30 object if the contents of the string is known. For
31 newstringobject() this is always the case, for
32 newsizedstringobject() this is the case when the first argument in
33 not NULL.
34 A common practice to allocate a string and then fill it in or
35 change it must be done carefully. It is only allowed to change the
36 contents of the string if the obect was gotten from
37 newsizedstringobject() with a NULL first argument, because in the
38 future these routines may try to do even more sharing of objects.
39*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000040PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000041PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000042{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000043 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000044#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045 if (size == 0 && (op = nullstring) != NULL) {
46#ifdef COUNT_ALLOCS
47 null_strings++;
48#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000049 Py_INCREF(op);
50 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 if (size == 1 && str != NULL &&
53 (op = characters[*str & UCHAR_MAX]) != NULL)
54 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055#ifdef COUNT_ALLOCS
56 one_strings++;
57#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000058 Py_INCREF(op);
59 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000060 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000061#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000062
63 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000065 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000066 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000068 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef CACHE_HASH
70 op->ob_shash = -1;
71#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000072#ifdef INTERN_STRINGS
73 op->ob_sinterned = NULL;
74#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (str != NULL)
76 memcpy(op->ob_sval, str, size);
77 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000078#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 if (size == 0) {
80 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 } else if (size == 1 && str != NULL) {
83 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000084 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000086#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000087 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088}
89
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000091PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000093 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 if (size > INT_MAX) {
96 PyErr_SetString(PyExc_OverflowError,
97 "string is too long for a Python string");
98 return NULL;
99 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000100#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 if (size == 0 && (op = nullstring) != NULL) {
102#ifdef COUNT_ALLOCS
103 null_strings++;
104#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105 Py_INCREF(op);
106 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 }
108 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
109#ifdef COUNT_ALLOCS
110 one_strings++;
111#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000112 Py_INCREF(op);
113 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000115#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000116
117 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000119 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000120 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000122 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123#ifdef CACHE_HASH
124 op->ob_shash = -1;
125#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000126#ifdef INTERN_STRINGS
127 op->ob_sinterned = NULL;
128#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000130#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
132 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 } else if (size == 1) {
135 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000138#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000139 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140}
141
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000142PyObject *PyString_Decode(const char *s,
143 int size,
144 const char *encoding,
145 const char *errors)
146{
147 PyObject *buffer = NULL, *str;
148
149 if (encoding == NULL)
150 encoding = PyUnicode_GetDefaultEncoding();
151
152 /* Decode via the codec registry */
153 buffer = PyBuffer_FromMemory((void *)s, size);
154 if (buffer == NULL)
155 goto onError;
156 str = PyCodec_Decode(buffer, encoding, errors);
157 if (str == NULL)
158 goto onError;
159 /* Convert Unicode to a string using the default encoding */
160 if (PyUnicode_Check(str)) {
161 PyObject *temp = str;
162 str = PyUnicode_AsEncodedString(str, NULL, NULL);
163 Py_DECREF(temp);
164 if (str == NULL)
165 goto onError;
166 }
167 if (!PyString_Check(str)) {
168 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000169 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000170 str->ob_type->tp_name);
171 Py_DECREF(str);
172 goto onError;
173 }
174 Py_DECREF(buffer);
175 return str;
176
177 onError:
178 Py_XDECREF(buffer);
179 return NULL;
180}
181
182PyObject *PyString_Encode(const char *s,
183 int size,
184 const char *encoding,
185 const char *errors)
186{
187 PyObject *v, *str;
188
189 str = PyString_FromStringAndSize(s, size);
190 if (str == NULL)
191 return NULL;
192 v = PyString_AsEncodedString(str, encoding, errors);
193 Py_DECREF(str);
194 return v;
195}
196
197PyObject *PyString_AsEncodedString(PyObject *str,
198 const char *encoding,
199 const char *errors)
200{
201 PyObject *v;
202
203 if (!PyString_Check(str)) {
204 PyErr_BadArgument();
205 goto onError;
206 }
207
208 if (encoding == NULL)
209 encoding = PyUnicode_GetDefaultEncoding();
210
211 /* Encode via the codec registry */
212 v = PyCodec_Encode(str, encoding, errors);
213 if (v == NULL)
214 goto onError;
215 /* Convert Unicode to a string using the default encoding */
216 if (PyUnicode_Check(v)) {
217 PyObject *temp = v;
218 v = PyUnicode_AsEncodedString(v, NULL, NULL);
219 Py_DECREF(temp);
220 if (v == NULL)
221 goto onError;
222 }
223 if (!PyString_Check(v)) {
224 PyErr_Format(PyExc_TypeError,
225 "encoder did not return a string object (type=%.400s)",
226 v->ob_type->tp_name);
227 Py_DECREF(v);
228 goto onError;
229 }
230 return v;
231
232 onError:
233 return NULL;
234}
235
Guido van Rossum234f9421993-06-17 12:35:49 +0000236static void
Fred Drakeba096332000-07-09 07:04:36 +0000237string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000238{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000239 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000240}
241
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000242static int
243string_getsize(register PyObject *op)
244{
245 char *s;
246 int len;
247 if (PyString_AsStringAndSize(op, &s, &len))
248 return -1;
249 return len;
250}
251
252static /*const*/ char *
253string_getbuffer(register PyObject *op)
254{
255 char *s;
256 int len;
257 if (PyString_AsStringAndSize(op, &s, &len))
258 return NULL;
259 return s;
260}
261
Guido van Rossumd7047b31995-01-02 19:07:15 +0000262int
Fred Drakeba096332000-07-09 07:04:36 +0000263PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000265 if (!PyString_Check(op))
266 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000267 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268}
269
270/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000271PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000273 if (!PyString_Check(op))
274 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000275 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276}
277
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000278/* Internal API needed by PyString_AsStringAndSize(): */
279extern
280PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
281 const char *errors);
282
283int
284PyString_AsStringAndSize(register PyObject *obj,
285 register char **s,
286 register int *len)
287{
288 if (s == NULL) {
289 PyErr_BadInternalCall();
290 return -1;
291 }
292
293 if (!PyString_Check(obj)) {
294 if (PyUnicode_Check(obj)) {
295 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
296 if (obj == NULL)
297 return -1;
298 }
299 else {
300 PyErr_Format(PyExc_TypeError,
301 "expected string or Unicode object, "
302 "%.200s found", obj->ob_type->tp_name);
303 return -1;
304 }
305 }
306
307 *s = PyString_AS_STRING(obj);
308 if (len != NULL)
309 *len = PyString_GET_SIZE(obj);
310 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
311 PyErr_SetString(PyExc_TypeError,
312 "expected string without null bytes");
313 return -1;
314 }
315 return 0;
316}
317
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318/* Methods */
319
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000320static int
Fred Drakeba096332000-07-09 07:04:36 +0000321string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322{
323 int i;
324 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000325 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000326 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000327 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000328 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000329 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000331
Thomas Wouters7e474022000-07-16 12:04:32 +0000332 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 quote = '\'';
334 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
335 quote = '"';
336
337 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 for (i = 0; i < op->ob_size; i++) {
339 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000340 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000341 fprintf(fp, "\\%c", c);
342 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000345 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000346 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000347 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000348 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349}
350
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000351static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000352string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000353{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000354 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
355 PyObject *v;
356 if (newsize > INT_MAX) {
357 PyErr_SetString(PyExc_OverflowError,
358 "string is too large to make repr");
359 }
360 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000361 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000362 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 }
364 else {
365 register int i;
366 register char c;
367 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000368 int quote;
369
Thomas Wouters7e474022000-07-16 12:04:32 +0000370 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000371 quote = '\'';
372 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
373 quote = '"';
374
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000375 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000376 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000377 for (i = 0; i < op->ob_size; i++) {
378 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000379 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380 *p++ = '\\', *p++ = c;
381 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000382 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000383 while (*p != '\0')
384 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000385 }
386 else
387 *p++ = c;
388 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000389 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000390 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000391 _PyString_Resize(
392 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000393 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000394 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000395}
396
397static int
Fred Drakeba096332000-07-09 07:04:36 +0000398string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000399{
400 return a->ob_size;
401}
402
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000403static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000404string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405{
406 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000407 register PyStringObject *op;
408 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000409 if (PyUnicode_Check(bb))
410 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000411 PyErr_Format(PyExc_TypeError,
412 "cannot add type \"%.200s\" to string",
413 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000414 return NULL;
415 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000416#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000417 /* Optimize cases with empty left or right operand */
418 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000419 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000420 return bb;
421 }
422 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000423 Py_INCREF(a);
424 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000425 }
426 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000427 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000429 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000430 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000431 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000432 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000433#ifdef CACHE_HASH
434 op->ob_shash = -1;
435#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000436#ifdef INTERN_STRINGS
437 op->ob_sinterned = NULL;
438#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000439 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
440 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
441 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000443#undef b
444}
445
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000446static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000447string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000448{
449 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000450 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000451 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000452 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000453 if (n < 0)
454 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000455 /* watch out for overflows: the size can overflow int,
456 * and the # of bytes needed can overflow size_t
457 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000459 if (n && size / n != a->ob_size) {
460 PyErr_SetString(PyExc_OverflowError,
461 "repeated string is too long");
462 return NULL;
463 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000465 Py_INCREF(a);
466 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 }
Tim Peters8f422462000-09-09 06:13:41 +0000468 nbytes = size * sizeof(char);
469 if (nbytes / sizeof(char) != (size_t)size ||
470 nbytes + sizeof(PyStringObject) <= nbytes) {
471 PyErr_SetString(PyExc_OverflowError,
472 "repeated string is too long");
473 return NULL;
474 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000475 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000476 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000477 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000478 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000479 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000480#ifdef CACHE_HASH
481 op->ob_shash = -1;
482#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000483#ifdef INTERN_STRINGS
484 op->ob_sinterned = NULL;
485#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000486 for (i = 0; i < size; i += a->ob_size)
487 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
488 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000489 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000490}
491
492/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
493
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000494static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000495string_slice(register PyStringObject *a, register int i, register int j)
496 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000497{
498 if (i < 0)
499 i = 0;
500 if (j < 0)
501 j = 0; /* Avoid signed/unsigned bug in next line */
502 if (j > a->ob_size)
503 j = a->ob_size;
504 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000505 Py_INCREF(a);
506 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000507 }
508 if (j < i)
509 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000510 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000511}
512
Guido van Rossum9284a572000-03-07 15:53:43 +0000513static int
Fred Drakeba096332000-07-09 07:04:36 +0000514string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000515{
516 register char *s, *end;
517 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000518 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000519 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000520 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000521 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000522 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000523 return -1;
524 }
525 c = PyString_AsString(el)[0];
526 s = PyString_AsString(a);
527 end = s + PyString_Size(a);
528 while (s < end) {
529 if (c == *s++)
530 return 1;
531 }
532 return 0;
533}
534
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000535static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000536string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000537{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000538 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000539 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000541 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000542 return NULL;
543 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000544 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000545 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000546#ifdef COUNT_ALLOCS
547 if (v != NULL)
548 one_strings++;
549#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000550 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000551 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000552 if (v == NULL)
553 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000554 characters[c] = (PyStringObject *) v;
555 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000556 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000557 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000558 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559}
560
561static int
Fred Drakeba096332000-07-09 07:04:36 +0000562string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000563{
Guido van Rossum253919f1991-02-13 23:18:39 +0000564 int len_a = a->ob_size, len_b = b->ob_size;
565 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000566 int cmp;
567 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000568 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000569 if (cmp == 0)
570 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
571 if (cmp != 0)
572 return cmp;
573 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000574 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000575}
576
Guido van Rossum9bfef441993-03-29 10:43:31 +0000577static long
Fred Drakeba096332000-07-09 07:04:36 +0000578string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000579{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000580 register int len;
581 register unsigned char *p;
582 register long x;
583
584#ifdef CACHE_HASH
585 if (a->ob_shash != -1)
586 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000587#ifdef INTERN_STRINGS
588 if (a->ob_sinterned != NULL)
589 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000590 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000591#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000592#endif
593 len = a->ob_size;
594 p = (unsigned char *) a->ob_sval;
595 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000596 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000597 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000598 x ^= a->ob_size;
599 if (x == -1)
600 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000601#ifdef CACHE_HASH
602 a->ob_shash = x;
603#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000604 return x;
605}
606
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000607static int
Fred Drakeba096332000-07-09 07:04:36 +0000608string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000609{
610 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000611 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000612 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000613 return -1;
614 }
615 *ptr = (void *)self->ob_sval;
616 return self->ob_size;
617}
618
619static int
Fred Drakeba096332000-07-09 07:04:36 +0000620string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000621{
Guido van Rossum045e6881997-09-08 18:30:11 +0000622 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000623 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000624 return -1;
625}
626
627static int
Fred Drakeba096332000-07-09 07:04:36 +0000628string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000629{
630 if ( lenp )
631 *lenp = self->ob_size;
632 return 1;
633}
634
Guido van Rossum1db70701998-10-08 02:18:52 +0000635static int
Fred Drakeba096332000-07-09 07:04:36 +0000636string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000637{
638 if ( index != 0 ) {
639 PyErr_SetString(PyExc_SystemError,
640 "accessing non-existent string segment");
641 return -1;
642 }
643 *ptr = self->ob_sval;
644 return self->ob_size;
645}
646
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000647static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000648 (inquiry)string_length, /*sq_length*/
649 (binaryfunc)string_concat, /*sq_concat*/
650 (intargfunc)string_repeat, /*sq_repeat*/
651 (intargfunc)string_item, /*sq_item*/
652 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000653 0, /*sq_ass_item*/
654 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000655 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000656};
657
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000658static PyBufferProcs string_as_buffer = {
659 (getreadbufferproc)string_buffer_getreadbuf,
660 (getwritebufferproc)string_buffer_getwritebuf,
661 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000662 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000663};
664
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665
666
667#define LEFTSTRIP 0
668#define RIGHTSTRIP 1
669#define BOTHSTRIP 2
670
671
672static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000673split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000674{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000675 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000676 PyObject* item;
677 PyObject *list = PyList_New(0);
678
679 if (list == NULL)
680 return NULL;
681
Guido van Rossum4c08d552000-03-10 22:55:18 +0000682 for (i = j = 0; i < len; ) {
683 while (i < len && isspace(Py_CHARMASK(s[i])))
684 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000685 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000686 while (i < len && !isspace(Py_CHARMASK(s[i])))
687 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000689 if (maxsplit-- <= 0)
690 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000691 item = PyString_FromStringAndSize(s+j, (int)(i-j));
692 if (item == NULL)
693 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000694 err = PyList_Append(list, item);
695 Py_DECREF(item);
696 if (err < 0)
697 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000698 while (i < len && isspace(Py_CHARMASK(s[i])))
699 i++;
700 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000701 }
702 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000703 if (j < len) {
704 item = PyString_FromStringAndSize(s+j, (int)(len - j));
705 if (item == NULL)
706 goto finally;
707 err = PyList_Append(list, item);
708 Py_DECREF(item);
709 if (err < 0)
710 goto finally;
711 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000712 return list;
713 finally:
714 Py_DECREF(list);
715 return NULL;
716}
717
718
719static char split__doc__[] =
720"S.split([sep [,maxsplit]]) -> list of strings\n\
721\n\
722Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000723delimiter string. If maxsplit is given, at most maxsplit\n\
724splits are done. If sep is not specified, any whitespace string\n\
725is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000726
727static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000728string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000729{
730 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000731 int maxsplit = -1;
732 const char *s = PyString_AS_STRING(self), *sub;
733 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000734
Guido van Rossum4c08d552000-03-10 22:55:18 +0000735 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000736 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (maxsplit < 0)
738 maxsplit = INT_MAX;
739 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000740 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000741 if (PyString_Check(subobj)) {
742 sub = PyString_AS_STRING(subobj);
743 n = PyString_GET_SIZE(subobj);
744 }
745 else if (PyUnicode_Check(subobj))
746 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
747 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
748 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000749 if (n == 0) {
750 PyErr_SetString(PyExc_ValueError, "empty separator");
751 return NULL;
752 }
753
754 list = PyList_New(0);
755 if (list == NULL)
756 return NULL;
757
758 i = j = 0;
759 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000760 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000761 if (maxsplit-- <= 0)
762 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000763 item = PyString_FromStringAndSize(s+j, (int)(i-j));
764 if (item == NULL)
765 goto fail;
766 err = PyList_Append(list, item);
767 Py_DECREF(item);
768 if (err < 0)
769 goto fail;
770 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000771 }
772 else
773 i++;
774 }
775 item = PyString_FromStringAndSize(s+j, (int)(len-j));
776 if (item == NULL)
777 goto fail;
778 err = PyList_Append(list, item);
779 Py_DECREF(item);
780 if (err < 0)
781 goto fail;
782
783 return list;
784
785 fail:
786 Py_DECREF(list);
787 return NULL;
788}
789
790
791static char join__doc__[] =
792"S.join(sequence) -> string\n\
793\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000794Return a string which is the concatenation of the strings in the\n\
795sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000796
797static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000798string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000799{
800 char *sep = PyString_AS_STRING(self);
801 int seplen = PyString_GET_SIZE(self);
802 PyObject *res = NULL;
803 int reslen = 0;
804 char *p;
805 int seqlen = 0;
806 int sz = 100;
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000807 int i, slen, sz_incr;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000808 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000809
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000810 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000811 return NULL;
812
Barry Warsaw771d0672000-07-11 04:58:12 +0000813 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000814 if (PyErr_ExceptionMatches(PyExc_TypeError))
815 PyErr_Format(PyExc_TypeError,
816 "sequence expected, %.80s found",
817 orig->ob_type->tp_name);
818 return NULL;
819 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000820 /* From here on out, errors go through finally: for proper
821 * reference count manipulations.
822 */
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000823 seqlen = PySequence_Size(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000824 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000825 item = PySequence_Fast_GET_ITEM(seq, 0);
826 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000827 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000828 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000829 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000830
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000831 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000832 goto finally;
833
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000834 p = PyString_AS_STRING(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000835
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000836 for (i = 0; i < seqlen; i++) {
837 item = PySequence_Fast_GET_ITEM(seq, i);
838 if (!PyString_Check(item)){
839 if (PyUnicode_Check(item)) {
840 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000841 Py_DECREF(seq);
842 return PyUnicode_Join((PyObject *)self, seq);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000843 }
844 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000845 "sequence item %i: expected string,"
846 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000847 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000848 goto finally;
849 }
850 slen = PyString_GET_SIZE(item);
851 while (reslen + slen + seplen >= sz) {
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000852 /* at least double the size of the string */
853 sz_incr = slen + seplen > sz ? slen + seplen : sz;
854 if (_PyString_Resize(&res, sz + sz_incr)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000855 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000856 }
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000857 sz += sz_incr;
858 p = PyString_AS_STRING(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000859 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000860 if (i > 0) {
861 memcpy(p, sep, seplen);
862 p += seplen;
863 reslen += seplen;
864 }
865 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000866 p += slen;
867 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868 }
869 if (_PyString_Resize(&res, reslen))
870 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000871 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000872 return res;
873
874 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000875 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000876 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000877 return NULL;
878}
879
880
881
882static long
Fred Drakeba096332000-07-09 07:04:36 +0000883string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000884{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000885 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000886 int len = PyString_GET_SIZE(self);
887 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000888 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889
Guido van Rossumc6821402000-05-08 14:08:05 +0000890 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
891 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000892 return -2;
893 if (PyString_Check(subobj)) {
894 sub = PyString_AS_STRING(subobj);
895 n = PyString_GET_SIZE(subobj);
896 }
897 else if (PyUnicode_Check(subobj))
898 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
899 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000900 return -2;
901
902 if (last > len)
903 last = len;
904 if (last < 0)
905 last += len;
906 if (last < 0)
907 last = 0;
908 if (i < 0)
909 i += len;
910 if (i < 0)
911 i = 0;
912
Guido van Rossum4c08d552000-03-10 22:55:18 +0000913 if (dir > 0) {
914 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000915 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000916 last -= n;
917 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000918 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000919 return (long)i;
920 }
921 else {
922 int j;
923
924 if (n == 0 && i <= last)
925 return (long)last;
926 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000927 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000928 return (long)j;
929 }
930
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000931 return -1;
932}
933
934
935static char find__doc__[] =
936"S.find(sub [,start [,end]]) -> int\n\
937\n\
938Return the lowest index in S where substring sub is found,\n\
939such that sub is contained within s[start,end]. Optional\n\
940arguments start and end are interpreted as in slice notation.\n\
941\n\
942Return -1 on failure.";
943
944static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000945string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000946{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000947 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000948 if (result == -2)
949 return NULL;
950 return PyInt_FromLong(result);
951}
952
953
954static char index__doc__[] =
955"S.index(sub [,start [,end]]) -> int\n\
956\n\
957Like S.find() but raise ValueError when the substring is not found.";
958
959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000960string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000961{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000962 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000963 if (result == -2)
964 return NULL;
965 if (result == -1) {
966 PyErr_SetString(PyExc_ValueError,
967 "substring not found in string.index");
968 return NULL;
969 }
970 return PyInt_FromLong(result);
971}
972
973
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000974static char rfind__doc__[] =
975"S.rfind(sub [,start [,end]]) -> int\n\
976\n\
977Return the highest index in S where substring sub is found,\n\
978such that sub is contained within s[start,end]. Optional\n\
979arguments start and end are interpreted as in slice notation.\n\
980\n\
981Return -1 on failure.";
982
983static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000984string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000985{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000986 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000987 if (result == -2)
988 return NULL;
989 return PyInt_FromLong(result);
990}
991
992
993static char rindex__doc__[] =
994"S.rindex(sub [,start [,end]]) -> int\n\
995\n\
996Like S.rfind() but raise ValueError when the substring is not found.";
997
998static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000999string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001000{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001001 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001002 if (result == -2)
1003 return NULL;
1004 if (result == -1) {
1005 PyErr_SetString(PyExc_ValueError,
1006 "substring not found in string.rindex");
1007 return NULL;
1008 }
1009 return PyInt_FromLong(result);
1010}
1011
1012
1013static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001014do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001015{
1016 char *s = PyString_AS_STRING(self);
1017 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001018
Guido van Rossum43713e52000-02-29 13:59:29 +00001019 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001020 return NULL;
1021
1022 i = 0;
1023 if (striptype != RIGHTSTRIP) {
1024 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1025 i++;
1026 }
1027 }
1028
1029 j = len;
1030 if (striptype != LEFTSTRIP) {
1031 do {
1032 j--;
1033 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1034 j++;
1035 }
1036
1037 if (i == 0 && j == len) {
1038 Py_INCREF(self);
1039 return (PyObject*)self;
1040 }
1041 else
1042 return PyString_FromStringAndSize(s+i, j-i);
1043}
1044
1045
1046static char strip__doc__[] =
1047"S.strip() -> string\n\
1048\n\
1049Return a copy of the string S with leading and trailing\n\
1050whitespace removed.";
1051
1052static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001053string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054{
1055 return do_strip(self, args, BOTHSTRIP);
1056}
1057
1058
1059static char lstrip__doc__[] =
1060"S.lstrip() -> string\n\
1061\n\
1062Return a copy of the string S with leading whitespace removed.";
1063
1064static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001065string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001066{
1067 return do_strip(self, args, LEFTSTRIP);
1068}
1069
1070
1071static char rstrip__doc__[] =
1072"S.rstrip() -> string\n\
1073\n\
1074Return a copy of the string S with trailing whitespace removed.";
1075
1076static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001077string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001078{
1079 return do_strip(self, args, RIGHTSTRIP);
1080}
1081
1082
1083static char lower__doc__[] =
1084"S.lower() -> string\n\
1085\n\
1086Return a copy of the string S converted to lowercase.";
1087
1088static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001089string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001090{
1091 char *s = PyString_AS_STRING(self), *s_new;
1092 int i, n = PyString_GET_SIZE(self);
1093 PyObject *new;
1094
Guido van Rossum43713e52000-02-29 13:59:29 +00001095 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001096 return NULL;
1097 new = PyString_FromStringAndSize(NULL, n);
1098 if (new == NULL)
1099 return NULL;
1100 s_new = PyString_AsString(new);
1101 for (i = 0; i < n; i++) {
1102 int c = Py_CHARMASK(*s++);
1103 if (isupper(c)) {
1104 *s_new = tolower(c);
1105 } else
1106 *s_new = c;
1107 s_new++;
1108 }
1109 return new;
1110}
1111
1112
1113static char upper__doc__[] =
1114"S.upper() -> string\n\
1115\n\
1116Return a copy of the string S converted to uppercase.";
1117
1118static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001119string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001120{
1121 char *s = PyString_AS_STRING(self), *s_new;
1122 int i, n = PyString_GET_SIZE(self);
1123 PyObject *new;
1124
Guido van Rossum43713e52000-02-29 13:59:29 +00001125 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126 return NULL;
1127 new = PyString_FromStringAndSize(NULL, n);
1128 if (new == NULL)
1129 return NULL;
1130 s_new = PyString_AsString(new);
1131 for (i = 0; i < n; i++) {
1132 int c = Py_CHARMASK(*s++);
1133 if (islower(c)) {
1134 *s_new = toupper(c);
1135 } else
1136 *s_new = c;
1137 s_new++;
1138 }
1139 return new;
1140}
1141
1142
Guido van Rossum4c08d552000-03-10 22:55:18 +00001143static char title__doc__[] =
1144"S.title() -> string\n\
1145\n\
1146Return a titlecased version of S, i.e. words start with uppercase\n\
1147characters, all remaining cased characters have lowercase.";
1148
1149static PyObject*
1150string_title(PyUnicodeObject *self, PyObject *args)
1151{
1152 char *s = PyString_AS_STRING(self), *s_new;
1153 int i, n = PyString_GET_SIZE(self);
1154 int previous_is_cased = 0;
1155 PyObject *new;
1156
1157 if (!PyArg_ParseTuple(args, ":title"))
1158 return NULL;
1159 new = PyString_FromStringAndSize(NULL, n);
1160 if (new == NULL)
1161 return NULL;
1162 s_new = PyString_AsString(new);
1163 for (i = 0; i < n; i++) {
1164 int c = Py_CHARMASK(*s++);
1165 if (islower(c)) {
1166 if (!previous_is_cased)
1167 c = toupper(c);
1168 previous_is_cased = 1;
1169 } else if (isupper(c)) {
1170 if (previous_is_cased)
1171 c = tolower(c);
1172 previous_is_cased = 1;
1173 } else
1174 previous_is_cased = 0;
1175 *s_new++ = c;
1176 }
1177 return new;
1178}
1179
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180static char capitalize__doc__[] =
1181"S.capitalize() -> string\n\
1182\n\
1183Return a copy of the string S with only its first character\n\
1184capitalized.";
1185
1186static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001187string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001188{
1189 char *s = PyString_AS_STRING(self), *s_new;
1190 int i, n = PyString_GET_SIZE(self);
1191 PyObject *new;
1192
Guido van Rossum43713e52000-02-29 13:59:29 +00001193 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001194 return NULL;
1195 new = PyString_FromStringAndSize(NULL, n);
1196 if (new == NULL)
1197 return NULL;
1198 s_new = PyString_AsString(new);
1199 if (0 < n) {
1200 int c = Py_CHARMASK(*s++);
1201 if (islower(c))
1202 *s_new = toupper(c);
1203 else
1204 *s_new = c;
1205 s_new++;
1206 }
1207 for (i = 1; i < n; i++) {
1208 int c = Py_CHARMASK(*s++);
1209 if (isupper(c))
1210 *s_new = tolower(c);
1211 else
1212 *s_new = c;
1213 s_new++;
1214 }
1215 return new;
1216}
1217
1218
1219static char count__doc__[] =
1220"S.count(sub[, start[, end]]) -> int\n\
1221\n\
1222Return the number of occurrences of substring sub in string\n\
1223S[start:end]. Optional arguments start and end are\n\
1224interpreted as in slice notation.";
1225
1226static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001227string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001228{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001229 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230 int len = PyString_GET_SIZE(self), n;
1231 int i = 0, last = INT_MAX;
1232 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001233 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001234
Guido van Rossumc6821402000-05-08 14:08:05 +00001235 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1236 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001237 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001238
Guido van Rossum4c08d552000-03-10 22:55:18 +00001239 if (PyString_Check(subobj)) {
1240 sub = PyString_AS_STRING(subobj);
1241 n = PyString_GET_SIZE(subobj);
1242 }
1243 else if (PyUnicode_Check(subobj))
1244 return PyInt_FromLong(
1245 PyUnicode_Count((PyObject *)self, subobj, i, last));
1246 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1247 return NULL;
1248
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001249 if (last > len)
1250 last = len;
1251 if (last < 0)
1252 last += len;
1253 if (last < 0)
1254 last = 0;
1255 if (i < 0)
1256 i += len;
1257 if (i < 0)
1258 i = 0;
1259 m = last + 1 - n;
1260 if (n == 0)
1261 return PyInt_FromLong((long) (m-i));
1262
1263 r = 0;
1264 while (i < m) {
1265 if (!memcmp(s+i, sub, n)) {
1266 r++;
1267 i += n;
1268 } else {
1269 i++;
1270 }
1271 }
1272 return PyInt_FromLong((long) r);
1273}
1274
1275
1276static char swapcase__doc__[] =
1277"S.swapcase() -> string\n\
1278\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001279Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280converted to lowercase and vice versa.";
1281
1282static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001283string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001284{
1285 char *s = PyString_AS_STRING(self), *s_new;
1286 int i, n = PyString_GET_SIZE(self);
1287 PyObject *new;
1288
Guido van Rossum43713e52000-02-29 13:59:29 +00001289 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 return NULL;
1291 new = PyString_FromStringAndSize(NULL, n);
1292 if (new == NULL)
1293 return NULL;
1294 s_new = PyString_AsString(new);
1295 for (i = 0; i < n; i++) {
1296 int c = Py_CHARMASK(*s++);
1297 if (islower(c)) {
1298 *s_new = toupper(c);
1299 }
1300 else if (isupper(c)) {
1301 *s_new = tolower(c);
1302 }
1303 else
1304 *s_new = c;
1305 s_new++;
1306 }
1307 return new;
1308}
1309
1310
1311static char translate__doc__[] =
1312"S.translate(table [,deletechars]) -> string\n\
1313\n\
1314Return a copy of the string S, where all characters occurring\n\
1315in the optional argument deletechars are removed, and the\n\
1316remaining characters have been mapped through the given\n\
1317translation table, which must be a string of length 256.";
1318
1319static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001320string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 register char *input, *output;
1323 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324 register int i, c, changed = 0;
1325 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001326 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327 int inlen, tablen, dellen = 0;
1328 PyObject *result;
1329 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001331
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 if (!PyArg_ParseTuple(args, "O|O:translate",
1333 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335
1336 if (PyString_Check(tableobj)) {
1337 table1 = PyString_AS_STRING(tableobj);
1338 tablen = PyString_GET_SIZE(tableobj);
1339 }
1340 else if (PyUnicode_Check(tableobj)) {
1341 /* Unicode .translate() does not support the deletechars
1342 parameter; instead a mapping to None will cause characters
1343 to be deleted. */
1344 if (delobj != NULL) {
1345 PyErr_SetString(PyExc_TypeError,
1346 "deletions are implemented differently for unicode");
1347 return NULL;
1348 }
1349 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1350 }
1351 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353
1354 if (delobj != NULL) {
1355 if (PyString_Check(delobj)) {
1356 del_table = PyString_AS_STRING(delobj);
1357 dellen = PyString_GET_SIZE(delobj);
1358 }
1359 else if (PyUnicode_Check(delobj)) {
1360 PyErr_SetString(PyExc_TypeError,
1361 "deletions are implemented differently for unicode");
1362 return NULL;
1363 }
1364 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1365 return NULL;
1366
1367 if (tablen != 256) {
1368 PyErr_SetString(PyExc_ValueError,
1369 "translation table must be 256 characters long");
1370 return NULL;
1371 }
1372 }
1373 else {
1374 del_table = NULL;
1375 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376 }
1377
1378 table = table1;
1379 inlen = PyString_Size(input_obj);
1380 result = PyString_FromStringAndSize((char *)NULL, inlen);
1381 if (result == NULL)
1382 return NULL;
1383 output_start = output = PyString_AsString(result);
1384 input = PyString_AsString(input_obj);
1385
1386 if (dellen == 0) {
1387 /* If no deletions are required, use faster code */
1388 for (i = inlen; --i >= 0; ) {
1389 c = Py_CHARMASK(*input++);
1390 if (Py_CHARMASK((*output++ = table[c])) != c)
1391 changed = 1;
1392 }
1393 if (changed)
1394 return result;
1395 Py_DECREF(result);
1396 Py_INCREF(input_obj);
1397 return input_obj;
1398 }
1399
1400 for (i = 0; i < 256; i++)
1401 trans_table[i] = Py_CHARMASK(table[i]);
1402
1403 for (i = 0; i < dellen; i++)
1404 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1405
1406 for (i = inlen; --i >= 0; ) {
1407 c = Py_CHARMASK(*input++);
1408 if (trans_table[c] != -1)
1409 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1410 continue;
1411 changed = 1;
1412 }
1413 if (!changed) {
1414 Py_DECREF(result);
1415 Py_INCREF(input_obj);
1416 return input_obj;
1417 }
1418 /* Fix the size of the resulting string */
1419 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1420 return NULL;
1421 return result;
1422}
1423
1424
1425/* What follows is used for implementing replace(). Perry Stoll. */
1426
1427/*
1428 mymemfind
1429
1430 strstr replacement for arbitrary blocks of memory.
1431
Barry Warsaw51ac5802000-03-20 16:36:48 +00001432 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433 contents of memory pointed to by PAT. Returns the index into MEM if
1434 found, or -1 if not found. If len of PAT is greater than length of
1435 MEM, the function returns -1.
1436*/
1437static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001438mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439{
1440 register int ii;
1441
1442 /* pattern can not occur in the last pat_len-1 chars */
1443 len -= pat_len;
1444
1445 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001446 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447 return ii;
1448 }
1449 }
1450 return -1;
1451}
1452
1453/*
1454 mymemcnt
1455
1456 Return the number of distinct times PAT is found in MEM.
1457 meaning mem=1111 and pat==11 returns 2.
1458 mem=11111 and pat==11 also return 2.
1459 */
1460static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001461mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462{
1463 register int offset = 0;
1464 int nfound = 0;
1465
1466 while (len >= 0) {
1467 offset = mymemfind(mem, len, pat, pat_len);
1468 if (offset == -1)
1469 break;
1470 mem += offset + pat_len;
1471 len -= offset + pat_len;
1472 nfound++;
1473 }
1474 return nfound;
1475}
1476
1477/*
1478 mymemreplace
1479
Thomas Wouters7e474022000-07-16 12:04:32 +00001480 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 replaced with SUB.
1482
Thomas Wouters7e474022000-07-16 12:04:32 +00001483 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 of PAT in STR, then the original string is returned. Otherwise, a new
1485 string is allocated here and returned.
1486
1487 on return, out_len is:
1488 the length of output string, or
1489 -1 if the input string is returned, or
1490 unchanged if an error occurs (no memory).
1491
1492 return value is:
1493 the new string allocated locally, or
1494 NULL if an error occurred.
1495*/
1496static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001497mymemreplace(const char *str, int len, /* input string */
1498 const char *pat, int pat_len, /* pattern string to find */
1499 const char *sub, int sub_len, /* substitution string */
1500 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001501 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502{
1503 char *out_s;
1504 char *new_s;
1505 int nfound, offset, new_len;
1506
1507 if (len == 0 || pat_len > len)
1508 goto return_same;
1509
1510 /* find length of output string */
1511 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001512 if (count < 0)
1513 count = INT_MAX;
1514 else if (nfound > count)
1515 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 if (nfound == 0)
1517 goto return_same;
1518 new_len = len + nfound*(sub_len - pat_len);
1519
Guido van Rossumb18618d2000-05-03 23:44:39 +00001520 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521 if (new_s == NULL) return NULL;
1522
1523 *out_len = new_len;
1524 out_s = new_s;
1525
1526 while (len > 0) {
1527 /* find index of next instance of pattern */
1528 offset = mymemfind(str, len, pat, pat_len);
1529 /* if not found, break out of loop */
1530 if (offset == -1) break;
1531
1532 /* copy non matching part of input string */
1533 memcpy(new_s, str, offset); /* copy part of str before pat */
1534 str += offset + pat_len; /* move str past pattern */
1535 len -= offset + pat_len; /* reduce length of str remaining */
1536
1537 /* copy substitute into the output string */
1538 new_s += offset; /* move new_s to dest for sub string */
1539 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1540 new_s += sub_len; /* offset new_s past sub string */
1541
1542 /* break when we've done count replacements */
1543 if (--count == 0) break;
1544 }
1545 /* copy any remaining values into output string */
1546 if (len > 0)
1547 memcpy(new_s, str, len);
1548 return out_s;
1549
1550 return_same:
1551 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001552 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553}
1554
1555
1556static char replace__doc__[] =
1557"S.replace (old, new[, maxsplit]) -> string\n\
1558\n\
1559Return a copy of string S with all occurrences of substring\n\
1560old replaced by new. If the optional argument maxsplit is\n\
1561given, only the first maxsplit occurrences are replaced.";
1562
1563static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001564string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001566 const char *str = PyString_AS_STRING(self), *sub, *repl;
1567 char *new_s;
1568 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1569 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572
Guido van Rossum4c08d552000-03-10 22:55:18 +00001573 if (!PyArg_ParseTuple(args, "OO|i:replace",
1574 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001576
1577 if (PyString_Check(subobj)) {
1578 sub = PyString_AS_STRING(subobj);
1579 sub_len = PyString_GET_SIZE(subobj);
1580 }
1581 else if (PyUnicode_Check(subobj))
1582 return PyUnicode_Replace((PyObject *)self,
1583 subobj, replobj, count);
1584 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1585 return NULL;
1586
1587 if (PyString_Check(replobj)) {
1588 repl = PyString_AS_STRING(replobj);
1589 repl_len = PyString_GET_SIZE(replobj);
1590 }
1591 else if (PyUnicode_Check(replobj))
1592 return PyUnicode_Replace((PyObject *)self,
1593 subobj, replobj, count);
1594 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1595 return NULL;
1596
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001597 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001598 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 return NULL;
1600 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001601 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 if (new_s == NULL) {
1603 PyErr_NoMemory();
1604 return NULL;
1605 }
1606 if (out_len == -1) {
1607 /* we're returning another reference to self */
1608 new = (PyObject*)self;
1609 Py_INCREF(new);
1610 }
1611 else {
1612 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001613 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 }
1615 return new;
1616}
1617
1618
1619static char startswith__doc__[] =
1620"S.startswith(prefix[, start[, end]]) -> int\n\
1621\n\
1622Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1623optional start, test S beginning at that position. With optional end, stop\n\
1624comparing S at that position.";
1625
1626static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001627string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001629 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 int plen;
1633 int start = 0;
1634 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001635 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636
Guido van Rossumc6821402000-05-08 14:08:05 +00001637 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1638 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001639 return NULL;
1640 if (PyString_Check(subobj)) {
1641 prefix = PyString_AS_STRING(subobj);
1642 plen = PyString_GET_SIZE(subobj);
1643 }
1644 else if (PyUnicode_Check(subobj))
1645 return PyInt_FromLong(
1646 PyUnicode_Tailmatch((PyObject *)self,
1647 subobj, start, end, -1));
1648 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649 return NULL;
1650
1651 /* adopt Java semantics for index out of range. it is legal for
1652 * offset to be == plen, but this only returns true if prefix is
1653 * the empty string.
1654 */
1655 if (start < 0 || start+plen > len)
1656 return PyInt_FromLong(0);
1657
1658 if (!memcmp(str+start, prefix, plen)) {
1659 /* did the match end after the specified end? */
1660 if (end < 0)
1661 return PyInt_FromLong(1);
1662 else if (end - start < plen)
1663 return PyInt_FromLong(0);
1664 else
1665 return PyInt_FromLong(1);
1666 }
1667 else return PyInt_FromLong(0);
1668}
1669
1670
1671static char endswith__doc__[] =
1672"S.endswith(suffix[, start[, end]]) -> int\n\
1673\n\
1674Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1675optional start, test S beginning at that position. With optional end, stop\n\
1676comparing S at that position.";
1677
1678static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001679string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001680{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 const char* suffix;
1684 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001685 int start = 0;
1686 int end = -1;
1687 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001688 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689
Guido van Rossumc6821402000-05-08 14:08:05 +00001690 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1691 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001692 return NULL;
1693 if (PyString_Check(subobj)) {
1694 suffix = PyString_AS_STRING(subobj);
1695 slen = PyString_GET_SIZE(subobj);
1696 }
1697 else if (PyUnicode_Check(subobj))
1698 return PyInt_FromLong(
1699 PyUnicode_Tailmatch((PyObject *)self,
1700 subobj, start, end, +1));
1701 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702 return NULL;
1703
Guido van Rossum4c08d552000-03-10 22:55:18 +00001704 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 return PyInt_FromLong(0);
1706
1707 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001708 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709
Guido van Rossum4c08d552000-03-10 22:55:18 +00001710 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711 return PyInt_FromLong(1);
1712 else return PyInt_FromLong(0);
1713}
1714
1715
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001716static char encode__doc__[] =
1717"S.encode([encoding[,errors]]) -> string\n\
1718\n\
1719Return an encoded string version of S. Default encoding is the current\n\
1720default string encoding. errors may be given to set a different error\n\
1721handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1722a ValueError. Other possible values are 'ignore' and 'replace'.";
1723
1724static PyObject *
1725string_encode(PyStringObject *self, PyObject *args)
1726{
1727 char *encoding = NULL;
1728 char *errors = NULL;
1729 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1730 return NULL;
1731 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1732}
1733
1734
Guido van Rossum4c08d552000-03-10 22:55:18 +00001735static char expandtabs__doc__[] =
1736"S.expandtabs([tabsize]) -> string\n\
1737\n\
1738Return a copy of S where all tab characters are expanded using spaces.\n\
1739If tabsize is not given, a tab size of 8 characters is assumed.";
1740
1741static PyObject*
1742string_expandtabs(PyStringObject *self, PyObject *args)
1743{
1744 const char *e, *p;
1745 char *q;
1746 int i, j;
1747 PyObject *u;
1748 int tabsize = 8;
1749
1750 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1751 return NULL;
1752
Thomas Wouters7e474022000-07-16 12:04:32 +00001753 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001754 i = j = 0;
1755 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1756 for (p = PyString_AS_STRING(self); p < e; p++)
1757 if (*p == '\t') {
1758 if (tabsize > 0)
1759 j += tabsize - (j % tabsize);
1760 }
1761 else {
1762 j++;
1763 if (*p == '\n' || *p == '\r') {
1764 i += j;
1765 j = 0;
1766 }
1767 }
1768
1769 /* Second pass: create output string and fill it */
1770 u = PyString_FromStringAndSize(NULL, i + j);
1771 if (!u)
1772 return NULL;
1773
1774 j = 0;
1775 q = PyString_AS_STRING(u);
1776
1777 for (p = PyString_AS_STRING(self); p < e; p++)
1778 if (*p == '\t') {
1779 if (tabsize > 0) {
1780 i = tabsize - (j % tabsize);
1781 j += i;
1782 while (i--)
1783 *q++ = ' ';
1784 }
1785 }
1786 else {
1787 j++;
1788 *q++ = *p;
1789 if (*p == '\n' || *p == '\r')
1790 j = 0;
1791 }
1792
1793 return u;
1794}
1795
1796static
1797PyObject *pad(PyStringObject *self,
1798 int left,
1799 int right,
1800 char fill)
1801{
1802 PyObject *u;
1803
1804 if (left < 0)
1805 left = 0;
1806 if (right < 0)
1807 right = 0;
1808
1809 if (left == 0 && right == 0) {
1810 Py_INCREF(self);
1811 return (PyObject *)self;
1812 }
1813
1814 u = PyString_FromStringAndSize(NULL,
1815 left + PyString_GET_SIZE(self) + right);
1816 if (u) {
1817 if (left)
1818 memset(PyString_AS_STRING(u), fill, left);
1819 memcpy(PyString_AS_STRING(u) + left,
1820 PyString_AS_STRING(self),
1821 PyString_GET_SIZE(self));
1822 if (right)
1823 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1824 fill, right);
1825 }
1826
1827 return u;
1828}
1829
1830static char ljust__doc__[] =
1831"S.ljust(width) -> string\n\
1832\n\
1833Return S left justified in a string of length width. Padding is\n\
1834done using spaces.";
1835
1836static PyObject *
1837string_ljust(PyStringObject *self, PyObject *args)
1838{
1839 int width;
1840 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1841 return NULL;
1842
1843 if (PyString_GET_SIZE(self) >= width) {
1844 Py_INCREF(self);
1845 return (PyObject*) self;
1846 }
1847
1848 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1849}
1850
1851
1852static char rjust__doc__[] =
1853"S.rjust(width) -> string\n\
1854\n\
1855Return S right justified in a string of length width. Padding is\n\
1856done using spaces.";
1857
1858static PyObject *
1859string_rjust(PyStringObject *self, PyObject *args)
1860{
1861 int width;
1862 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1863 return NULL;
1864
1865 if (PyString_GET_SIZE(self) >= width) {
1866 Py_INCREF(self);
1867 return (PyObject*) self;
1868 }
1869
1870 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1871}
1872
1873
1874static char center__doc__[] =
1875"S.center(width) -> string\n\
1876\n\
1877Return S centered in a string of length width. Padding is done\n\
1878using spaces.";
1879
1880static PyObject *
1881string_center(PyStringObject *self, PyObject *args)
1882{
1883 int marg, left;
1884 int width;
1885
1886 if (!PyArg_ParseTuple(args, "i:center", &width))
1887 return NULL;
1888
1889 if (PyString_GET_SIZE(self) >= width) {
1890 Py_INCREF(self);
1891 return (PyObject*) self;
1892 }
1893
1894 marg = width - PyString_GET_SIZE(self);
1895 left = marg / 2 + (marg & width & 1);
1896
1897 return pad(self, left, marg - left, ' ');
1898}
1899
1900#if 0
1901static char zfill__doc__[] =
1902"S.zfill(width) -> string\n\
1903\n\
1904Pad a numeric string x with zeros on the left, to fill a field\n\
1905of the specified width. The string x is never truncated.";
1906
1907static PyObject *
1908string_zfill(PyStringObject *self, PyObject *args)
1909{
1910 int fill;
1911 PyObject *u;
1912 char *str;
1913
1914 int width;
1915 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1916 return NULL;
1917
1918 if (PyString_GET_SIZE(self) >= width) {
1919 Py_INCREF(self);
1920 return (PyObject*) self;
1921 }
1922
1923 fill = width - PyString_GET_SIZE(self);
1924
1925 u = pad(self, fill, 0, '0');
1926 if (u == NULL)
1927 return NULL;
1928
1929 str = PyString_AS_STRING(u);
1930 if (str[fill] == '+' || str[fill] == '-') {
1931 /* move sign to beginning of string */
1932 str[0] = str[fill];
1933 str[fill] = '0';
1934 }
1935
1936 return u;
1937}
1938#endif
1939
1940static char isspace__doc__[] =
1941"S.isspace() -> int\n\
1942\n\
1943Return 1 if there are only whitespace characters in S,\n\
19440 otherwise.";
1945
1946static PyObject*
1947string_isspace(PyStringObject *self, PyObject *args)
1948{
Fred Drakeba096332000-07-09 07:04:36 +00001949 register const unsigned char *p
1950 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001951 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001952
1953 if (!PyArg_NoArgs(args))
1954 return NULL;
1955
1956 /* Shortcut for single character strings */
1957 if (PyString_GET_SIZE(self) == 1 &&
1958 isspace(*p))
1959 return PyInt_FromLong(1);
1960
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001961 /* Special case for empty strings */
1962 if (PyString_GET_SIZE(self) == 0)
1963 return PyInt_FromLong(0);
1964
Guido van Rossum4c08d552000-03-10 22:55:18 +00001965 e = p + PyString_GET_SIZE(self);
1966 for (; p < e; p++) {
1967 if (!isspace(*p))
1968 return PyInt_FromLong(0);
1969 }
1970 return PyInt_FromLong(1);
1971}
1972
1973
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001974static char isalpha__doc__[] =
1975"S.isalpha() -> int\n\
1976\n\
1977Return 1 if all characters in S are alphabetic\n\
1978and there is at least one character in S, 0 otherwise.";
1979
1980static PyObject*
1981string_isalpha(PyUnicodeObject *self, PyObject *args)
1982{
Fred Drakeba096332000-07-09 07:04:36 +00001983 register const unsigned char *p
1984 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001985 register const unsigned char *e;
1986
1987 if (!PyArg_NoArgs(args))
1988 return NULL;
1989
1990 /* Shortcut for single character strings */
1991 if (PyString_GET_SIZE(self) == 1 &&
1992 isalpha(*p))
1993 return PyInt_FromLong(1);
1994
1995 /* Special case for empty strings */
1996 if (PyString_GET_SIZE(self) == 0)
1997 return PyInt_FromLong(0);
1998
1999 e = p + PyString_GET_SIZE(self);
2000 for (; p < e; p++) {
2001 if (!isalpha(*p))
2002 return PyInt_FromLong(0);
2003 }
2004 return PyInt_FromLong(1);
2005}
2006
2007
2008static char isalnum__doc__[] =
2009"S.isalnum() -> int\n\
2010\n\
2011Return 1 if all characters in S are alphanumeric\n\
2012and there is at least one character in S, 0 otherwise.";
2013
2014static PyObject*
2015string_isalnum(PyUnicodeObject *self, PyObject *args)
2016{
Fred Drakeba096332000-07-09 07:04:36 +00002017 register const unsigned char *p
2018 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002019 register const unsigned char *e;
2020
2021 if (!PyArg_NoArgs(args))
2022 return NULL;
2023
2024 /* Shortcut for single character strings */
2025 if (PyString_GET_SIZE(self) == 1 &&
2026 isalnum(*p))
2027 return PyInt_FromLong(1);
2028
2029 /* Special case for empty strings */
2030 if (PyString_GET_SIZE(self) == 0)
2031 return PyInt_FromLong(0);
2032
2033 e = p + PyString_GET_SIZE(self);
2034 for (; p < e; p++) {
2035 if (!isalnum(*p))
2036 return PyInt_FromLong(0);
2037 }
2038 return PyInt_FromLong(1);
2039}
2040
2041
Guido van Rossum4c08d552000-03-10 22:55:18 +00002042static char isdigit__doc__[] =
2043"S.isdigit() -> int\n\
2044\n\
2045Return 1 if there are only digit characters in S,\n\
20460 otherwise.";
2047
2048static PyObject*
2049string_isdigit(PyStringObject *self, PyObject *args)
2050{
Fred Drakeba096332000-07-09 07:04:36 +00002051 register const unsigned char *p
2052 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002053 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002054
2055 if (!PyArg_NoArgs(args))
2056 return NULL;
2057
2058 /* Shortcut for single character strings */
2059 if (PyString_GET_SIZE(self) == 1 &&
2060 isdigit(*p))
2061 return PyInt_FromLong(1);
2062
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002063 /* Special case for empty strings */
2064 if (PyString_GET_SIZE(self) == 0)
2065 return PyInt_FromLong(0);
2066
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067 e = p + PyString_GET_SIZE(self);
2068 for (; p < e; p++) {
2069 if (!isdigit(*p))
2070 return PyInt_FromLong(0);
2071 }
2072 return PyInt_FromLong(1);
2073}
2074
2075
2076static char islower__doc__[] =
2077"S.islower() -> int\n\
2078\n\
2079Return 1 if all cased characters in S are lowercase and there is\n\
2080at least one cased character in S, 0 otherwise.";
2081
2082static PyObject*
2083string_islower(PyStringObject *self, PyObject *args)
2084{
Fred Drakeba096332000-07-09 07:04:36 +00002085 register const unsigned char *p
2086 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002087 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088 int cased;
2089
2090 if (!PyArg_NoArgs(args))
2091 return NULL;
2092
2093 /* Shortcut for single character strings */
2094 if (PyString_GET_SIZE(self) == 1)
2095 return PyInt_FromLong(islower(*p) != 0);
2096
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002097 /* Special case for empty strings */
2098 if (PyString_GET_SIZE(self) == 0)
2099 return PyInt_FromLong(0);
2100
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101 e = p + PyString_GET_SIZE(self);
2102 cased = 0;
2103 for (; p < e; p++) {
2104 if (isupper(*p))
2105 return PyInt_FromLong(0);
2106 else if (!cased && islower(*p))
2107 cased = 1;
2108 }
2109 return PyInt_FromLong(cased);
2110}
2111
2112
2113static char isupper__doc__[] =
2114"S.isupper() -> int\n\
2115\n\
2116Return 1 if all cased characters in S are uppercase and there is\n\
2117at least one cased character in S, 0 otherwise.";
2118
2119static PyObject*
2120string_isupper(PyStringObject *self, PyObject *args)
2121{
Fred Drakeba096332000-07-09 07:04:36 +00002122 register const unsigned char *p
2123 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002124 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002125 int cased;
2126
2127 if (!PyArg_NoArgs(args))
2128 return NULL;
2129
2130 /* Shortcut for single character strings */
2131 if (PyString_GET_SIZE(self) == 1)
2132 return PyInt_FromLong(isupper(*p) != 0);
2133
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002134 /* Special case for empty strings */
2135 if (PyString_GET_SIZE(self) == 0)
2136 return PyInt_FromLong(0);
2137
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 e = p + PyString_GET_SIZE(self);
2139 cased = 0;
2140 for (; p < e; p++) {
2141 if (islower(*p))
2142 return PyInt_FromLong(0);
2143 else if (!cased && isupper(*p))
2144 cased = 1;
2145 }
2146 return PyInt_FromLong(cased);
2147}
2148
2149
2150static char istitle__doc__[] =
2151"S.istitle() -> int\n\
2152\n\
2153Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2154may only follow uncased characters and lowercase characters only cased\n\
2155ones. Return 0 otherwise.";
2156
2157static PyObject*
2158string_istitle(PyStringObject *self, PyObject *args)
2159{
Fred Drakeba096332000-07-09 07:04:36 +00002160 register const unsigned char *p
2161 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002162 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002163 int cased, previous_is_cased;
2164
2165 if (!PyArg_NoArgs(args))
2166 return NULL;
2167
2168 /* Shortcut for single character strings */
2169 if (PyString_GET_SIZE(self) == 1)
2170 return PyInt_FromLong(isupper(*p) != 0);
2171
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002172 /* Special case for empty strings */
2173 if (PyString_GET_SIZE(self) == 0)
2174 return PyInt_FromLong(0);
2175
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 e = p + PyString_GET_SIZE(self);
2177 cased = 0;
2178 previous_is_cased = 0;
2179 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002180 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002181
2182 if (isupper(ch)) {
2183 if (previous_is_cased)
2184 return PyInt_FromLong(0);
2185 previous_is_cased = 1;
2186 cased = 1;
2187 }
2188 else if (islower(ch)) {
2189 if (!previous_is_cased)
2190 return PyInt_FromLong(0);
2191 previous_is_cased = 1;
2192 cased = 1;
2193 }
2194 else
2195 previous_is_cased = 0;
2196 }
2197 return PyInt_FromLong(cased);
2198}
2199
2200
2201static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002202"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203\n\
2204Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002205Line breaks are not included in the resulting list unless keepends\n\
2206is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207
2208#define SPLIT_APPEND(data, left, right) \
2209 str = PyString_FromStringAndSize(data + left, right - left); \
2210 if (!str) \
2211 goto onError; \
2212 if (PyList_Append(list, str)) { \
2213 Py_DECREF(str); \
2214 goto onError; \
2215 } \
2216 else \
2217 Py_DECREF(str);
2218
2219static PyObject*
2220string_splitlines(PyStringObject *self, PyObject *args)
2221{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222 register int i;
2223 register int j;
2224 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002225 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 PyObject *list;
2227 PyObject *str;
2228 char *data;
2229
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002230 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002231 return NULL;
2232
2233 data = PyString_AS_STRING(self);
2234 len = PyString_GET_SIZE(self);
2235
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 list = PyList_New(0);
2237 if (!list)
2238 goto onError;
2239
2240 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002241 int eol;
2242
Guido van Rossum4c08d552000-03-10 22:55:18 +00002243 /* Find a line and append it */
2244 while (i < len && data[i] != '\n' && data[i] != '\r')
2245 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246
2247 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002248 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249 if (i < len) {
2250 if (data[i] == '\r' && i + 1 < len &&
2251 data[i+1] == '\n')
2252 i += 2;
2253 else
2254 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002255 if (keepends)
2256 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002258 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 j = i;
2260 }
2261 if (j < len) {
2262 SPLIT_APPEND(data, j, len);
2263 }
2264
2265 return list;
2266
2267 onError:
2268 Py_DECREF(list);
2269 return NULL;
2270}
2271
2272#undef SPLIT_APPEND
2273
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274
2275static PyMethodDef
2276string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002277 /* Counterparts of the obsolete stropmodule functions; except
2278 string.maketrans(). */
2279 {"join", (PyCFunction)string_join, 1, join__doc__},
2280 {"split", (PyCFunction)string_split, 1, split__doc__},
2281 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2282 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2283 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2284 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2285 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2286 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2287 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002288 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2289 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2291 {"count", (PyCFunction)string_count, 1, count__doc__},
2292 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2293 {"find", (PyCFunction)string_find, 1, find__doc__},
2294 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2297 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2298 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2299 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2301 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2302 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2304 {"title", (PyCFunction)string_title, 1, title__doc__},
2305 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2306 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2307 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002308 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2310 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2311#if 0
2312 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2313#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314 {NULL, NULL} /* sentinel */
2315};
2316
2317static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002318string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319{
2320 return Py_FindMethod(string_methods, (PyObject*)s, name);
2321}
2322
2323
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002324PyTypeObject PyString_Type = {
2325 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002326 0,
2327 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002328 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002329 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002330 (destructor)string_dealloc, /*tp_dealloc*/
2331 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002333 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002334 (cmpfunc)string_compare, /*tp_compare*/
2335 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002336 0, /*tp_as_number*/
2337 &string_as_sequence, /*tp_as_sequence*/
2338 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002339 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002340 0, /*tp_call*/
2341 0, /*tp_str*/
2342 0, /*tp_getattro*/
2343 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002344 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002345 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002346 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002347};
2348
2349void
Fred Drakeba096332000-07-09 07:04:36 +00002350PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002351{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002352 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002353 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002354 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002355 if (w == NULL || !PyString_Check(*pv)) {
2356 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002357 *pv = NULL;
2358 return;
2359 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002360 v = string_concat((PyStringObject *) *pv, w);
2361 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002362 *pv = v;
2363}
2364
Guido van Rossum013142a1994-08-30 08:19:36 +00002365void
Fred Drakeba096332000-07-09 07:04:36 +00002366PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002367{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002368 PyString_Concat(pv, w);
2369 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002370}
2371
2372
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002373/* The following function breaks the notion that strings are immutable:
2374 it changes the size of a string. We get away with this only if there
2375 is only one module referencing the object. You can also think of it
2376 as creating a new string object and destroying the old one, only
2377 more efficiently. In any case, don't use this if the string may
2378 already be known to some other part of the code... */
2379
2380int
Fred Drakeba096332000-07-09 07:04:36 +00002381_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002382{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002383 register PyObject *v;
2384 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002385 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002386 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002387 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002388 Py_DECREF(v);
2389 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002390 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002391 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002392 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002393#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002394 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002395#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002396 _Py_ForgetReference(v);
2397 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002398 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002399 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002400 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002401 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002402 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002403 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002404 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002405 _Py_NewReference(*pv);
2406 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002407 sv->ob_size = newsize;
2408 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002409 return 0;
2410}
Guido van Rossume5372401993-03-16 12:15:04 +00002411
2412/* Helpers for formatstring */
2413
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002414static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002415getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002416{
2417 int argidx = *p_argidx;
2418 if (argidx < arglen) {
2419 (*p_argidx)++;
2420 if (arglen < 0)
2421 return args;
2422 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002423 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002424 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002425 PyErr_SetString(PyExc_TypeError,
2426 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002427 return NULL;
2428}
2429
2430#define F_LJUST (1<<0)
2431#define F_SIGN (1<<1)
2432#define F_BLANK (1<<2)
2433#define F_ALT (1<<3)
2434#define F_ZERO (1<<4)
2435
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002436static int
Fred Drakeba096332000-07-09 07:04:36 +00002437formatfloat(char *buf, size_t buflen, int flags,
2438 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002439{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002440 /* fmt = '%#.' + `prec` + `type`
2441 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002442 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002443 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002444 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002445 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002446 if (prec < 0)
2447 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002448 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2449 type = 'g';
2450 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002451 /* worst case length calc to ensure no buffer overrun:
2452 fmt = %#.<prec>g
2453 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2454 for any double rep.)
2455 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2456 If prec=0 the effective precision is 1 (the leading digit is
2457 always given), therefore increase by one to 10+prec. */
2458 if (buflen <= (size_t)10 + (size_t)prec) {
2459 PyErr_SetString(PyExc_OverflowError,
2460 "formatted float is too long (precision too long?)");
2461 return -1;
2462 }
Guido van Rossume5372401993-03-16 12:15:04 +00002463 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002464 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002465}
2466
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002467static int
Fred Drakeba096332000-07-09 07:04:36 +00002468formatint(char *buf, size_t buflen, int flags,
2469 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002470{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002471 /* fmt = '%#.' + `prec` + 'l' + `type`
2472 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002473 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002474 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002475 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002476 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002477 if (prec < 0)
2478 prec = 1;
2479 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002480 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2481 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2482 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2483 PyErr_SetString(PyExc_OverflowError,
2484 "formatted integer is too long (precision too long?)");
2485 return -1;
2486 }
Guido van Rossume5372401993-03-16 12:15:04 +00002487 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002488 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002489}
2490
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002491static int
Fred Drakeba096332000-07-09 07:04:36 +00002492formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002493{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002494 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002495 if (PyString_Check(v)) {
2496 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002497 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002498 }
2499 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002500 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002501 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002502 }
2503 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002504 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002505}
2506
Guido van Rossum013142a1994-08-30 08:19:36 +00002507
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002508/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2509
2510 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2511 chars are formatted. XXX This is a magic number. Each formatting
2512 routine does bounds checking to ensure no overflow, but a better
2513 solution may be to malloc a buffer of appropriate size for each
2514 format. For now, the current solution is sufficient.
2515*/
2516#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002517
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002518PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002519PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002520{
2521 char *fmt, *res;
2522 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002523 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002524 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002525 PyObject *dict = NULL;
2526 if (format == NULL || !PyString_Check(format) || args == NULL) {
2527 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002528 return NULL;
2529 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002530 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002531 fmt = PyString_AsString(format);
2532 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002533 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002534 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002535 if (result == NULL)
2536 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002537 res = PyString_AsString(result);
2538 if (PyTuple_Check(args)) {
2539 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002540 argidx = 0;
2541 }
2542 else {
2543 arglen = -1;
2544 argidx = -2;
2545 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002546 if (args->ob_type->tp_as_mapping)
2547 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002548 while (--fmtcnt >= 0) {
2549 if (*fmt != '%') {
2550 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002551 rescnt = fmtcnt + 100;
2552 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002553 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002554 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002555 res = PyString_AsString(result)
2556 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002557 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002558 }
2559 *res++ = *fmt++;
2560 }
2561 else {
2562 /* Got a format specifier */
2563 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002564 int width = -1;
2565 int prec = -1;
2566 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002567 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002568 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002569 PyObject *v = NULL;
2570 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002571 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002572 int sign;
2573 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002574 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002575 char *fmt_start = fmt;
2576
Guido van Rossumda9c2711996-12-05 21:58:58 +00002577 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002578 if (*fmt == '(') {
2579 char *keystart;
2580 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002581 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002582 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002583
2584 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002585 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002586 "format requires a mapping");
2587 goto error;
2588 }
2589 ++fmt;
2590 --fmtcnt;
2591 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002592 /* Skip over balanced parentheses */
2593 while (pcount > 0 && --fmtcnt >= 0) {
2594 if (*fmt == ')')
2595 --pcount;
2596 else if (*fmt == '(')
2597 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002598 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002599 }
2600 keylen = fmt - keystart - 1;
2601 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002602 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002603 "incomplete format key");
2604 goto error;
2605 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002606 key = PyString_FromStringAndSize(keystart,
2607 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002608 if (key == NULL)
2609 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002610 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002611 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002612 args_owned = 0;
2613 }
2614 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002615 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002616 if (args == NULL) {
2617 goto error;
2618 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002619 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002620 arglen = -1;
2621 argidx = -2;
2622 }
Guido van Rossume5372401993-03-16 12:15:04 +00002623 while (--fmtcnt >= 0) {
2624 switch (c = *fmt++) {
2625 case '-': flags |= F_LJUST; continue;
2626 case '+': flags |= F_SIGN; continue;
2627 case ' ': flags |= F_BLANK; continue;
2628 case '#': flags |= F_ALT; continue;
2629 case '0': flags |= F_ZERO; continue;
2630 }
2631 break;
2632 }
2633 if (c == '*') {
2634 v = getnextarg(args, arglen, &argidx);
2635 if (v == NULL)
2636 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002637 if (!PyInt_Check(v)) {
2638 PyErr_SetString(PyExc_TypeError,
2639 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002640 goto error;
2641 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002642 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002643 if (width < 0) {
2644 flags |= F_LJUST;
2645 width = -width;
2646 }
Guido van Rossume5372401993-03-16 12:15:04 +00002647 if (--fmtcnt >= 0)
2648 c = *fmt++;
2649 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002650 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002651 width = c - '0';
2652 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002653 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002654 if (!isdigit(c))
2655 break;
2656 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002657 PyErr_SetString(
2658 PyExc_ValueError,
2659 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002660 goto error;
2661 }
2662 width = width*10 + (c - '0');
2663 }
2664 }
2665 if (c == '.') {
2666 prec = 0;
2667 if (--fmtcnt >= 0)
2668 c = *fmt++;
2669 if (c == '*') {
2670 v = getnextarg(args, arglen, &argidx);
2671 if (v == NULL)
2672 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002673 if (!PyInt_Check(v)) {
2674 PyErr_SetString(
2675 PyExc_TypeError,
2676 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002677 goto error;
2678 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002679 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002680 if (prec < 0)
2681 prec = 0;
2682 if (--fmtcnt >= 0)
2683 c = *fmt++;
2684 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002685 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002686 prec = c - '0';
2687 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002688 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002689 if (!isdigit(c))
2690 break;
2691 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002692 PyErr_SetString(
2693 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002694 "prec too big");
2695 goto error;
2696 }
2697 prec = prec*10 + (c - '0');
2698 }
2699 }
2700 } /* prec */
2701 if (fmtcnt >= 0) {
2702 if (c == 'h' || c == 'l' || c == 'L') {
2703 size = c;
2704 if (--fmtcnt >= 0)
2705 c = *fmt++;
2706 }
2707 }
2708 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002709 PyErr_SetString(PyExc_ValueError,
2710 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002711 goto error;
2712 }
2713 if (c != '%') {
2714 v = getnextarg(args, arglen, &argidx);
2715 if (v == NULL)
2716 goto error;
2717 }
2718 sign = 0;
2719 fill = ' ';
2720 switch (c) {
2721 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002722 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002723 len = 1;
2724 break;
2725 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002726 case 'r':
2727 if (PyUnicode_Check(v)) {
2728 fmt = fmt_start;
2729 goto unicode;
2730 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002731 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002732 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002733 else
2734 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002735 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002736 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002737 if (!PyString_Check(temp)) {
2738 PyErr_SetString(PyExc_TypeError,
2739 "%s argument has non-string str()");
2740 goto error;
2741 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002742 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002743 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002744 if (prec >= 0 && len > prec)
2745 len = prec;
2746 break;
2747 case 'i':
2748 case 'd':
2749 case 'u':
2750 case 'o':
2751 case 'x':
2752 case 'X':
2753 if (c == 'i')
2754 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002755 pbuf = formatbuf;
2756 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002757 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002758 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002759 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002760 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002761 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002762 if ((flags&F_ALT) &&
2763 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002764 pbuf[0] == '0' && pbuf[1] == c) {
2765 *res++ = *pbuf++;
2766 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002767 rescnt -= 2;
2768 len -= 2;
2769 width -= 2;
2770 if (width < 0)
2771 width = 0;
2772 }
2773 }
Guido van Rossume5372401993-03-16 12:15:04 +00002774 break;
2775 case 'e':
2776 case 'E':
2777 case 'f':
2778 case 'g':
2779 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002780 pbuf = formatbuf;
2781 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002782 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002783 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002784 sign = 1;
2785 if (flags&F_ZERO)
2786 fill = '0';
2787 break;
2788 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002789 pbuf = formatbuf;
2790 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002791 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002792 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002793 break;
2794 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002795 PyErr_Format(PyExc_ValueError,
2796 "unsupported format character '%c' (0x%x)",
2797 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002798 goto error;
2799 }
2800 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002801 if (*pbuf == '-' || *pbuf == '+') {
2802 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002803 len--;
2804 }
2805 else if (flags & F_SIGN)
2806 sign = '+';
2807 else if (flags & F_BLANK)
2808 sign = ' ';
2809 else
2810 sign = '\0';
2811 }
2812 if (width < len)
2813 width = len;
2814 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002815 reslen -= rescnt;
2816 rescnt = width + fmtcnt + 100;
2817 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002818 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002819 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002820 res = PyString_AsString(result)
2821 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002822 }
2823 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002824 if (fill != ' ')
2825 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002826 rescnt--;
2827 if (width > len)
2828 width--;
2829 }
2830 if (width > len && !(flags&F_LJUST)) {
2831 do {
2832 --rescnt;
2833 *res++ = fill;
2834 } while (--width > len);
2835 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002836 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002837 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002838 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002839 res += len;
2840 rescnt -= len;
2841 while (--width >= len) {
2842 --rescnt;
2843 *res++ = ' ';
2844 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002845 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002846 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002847 "not all arguments converted");
2848 goto error;
2849 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002850 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002851 } /* '%' */
2852 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002853 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002854 PyErr_SetString(PyExc_TypeError,
2855 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002856 goto error;
2857 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002858 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002859 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002860 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002861 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002862 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002863
2864 unicode:
2865 if (args_owned) {
2866 Py_DECREF(args);
2867 args_owned = 0;
2868 }
2869 /* Fiddle args right (remove the first argidx-1 arguments) */
2870 --argidx;
2871 if (PyTuple_Check(orig_args) && argidx > 0) {
2872 PyObject *v;
2873 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2874 v = PyTuple_New(n);
2875 if (v == NULL)
2876 goto error;
2877 while (--n >= 0) {
2878 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2879 Py_INCREF(w);
2880 PyTuple_SET_ITEM(v, n, w);
2881 }
2882 args = v;
2883 } else {
2884 Py_INCREF(orig_args);
2885 args = orig_args;
2886 }
2887 /* Paste rest of format string to what we have of the result
2888 string; we reuse result for this */
2889 rescnt = res - PyString_AS_STRING(result);
2890 fmtcnt = PyString_GET_SIZE(format) - \
2891 (fmt - PyString_AS_STRING(format));
2892 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2893 Py_DECREF(args);
2894 goto error;
2895 }
2896 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2897 format = result;
2898 /* Let Unicode do its magic */
2899 result = PyUnicode_Format(format, args);
2900 Py_DECREF(format);
2901 Py_DECREF(args);
2902 return result;
2903
Guido van Rossume5372401993-03-16 12:15:04 +00002904 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002905 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002906 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002907 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002908 }
Guido van Rossume5372401993-03-16 12:15:04 +00002909 return NULL;
2910}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002911
2912
2913#ifdef INTERN_STRINGS
2914
Barry Warsaw4df762f2000-08-16 23:41:01 +00002915/* This dictionary will leak at PyString_Fini() time. That's acceptable
2916 * because PyString_Fini() specifically frees interned strings that are
2917 * only referenced by this dictionary. The CVS log entry for revision 2.45
2918 * says:
2919 *
2920 * Change the Fini function to only remove otherwise unreferenced
2921 * strings from the interned table. There are references in
2922 * hard-to-find static variables all over the interpreter, and it's not
2923 * worth trying to get rid of all those; but "uninterning" isn't fair
2924 * either and may cause subtle failures later -- so we have to keep them
2925 * in the interned table.
2926 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00002927static PyObject *interned;
2928
2929void
Fred Drakeba096332000-07-09 07:04:36 +00002930PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002931{
2932 register PyStringObject *s = (PyStringObject *)(*p);
2933 PyObject *t;
2934 if (s == NULL || !PyString_Check(s))
2935 Py_FatalError("PyString_InternInPlace: strings only please!");
2936 if ((t = s->ob_sinterned) != NULL) {
2937 if (t == (PyObject *)s)
2938 return;
2939 Py_INCREF(t);
2940 *p = t;
2941 Py_DECREF(s);
2942 return;
2943 }
2944 if (interned == NULL) {
2945 interned = PyDict_New();
2946 if (interned == NULL)
2947 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002948 }
2949 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2950 Py_INCREF(t);
2951 *p = s->ob_sinterned = t;
2952 Py_DECREF(s);
2953 return;
2954 }
2955 t = (PyObject *)s;
2956 if (PyDict_SetItem(interned, t, t) == 0) {
2957 s->ob_sinterned = t;
2958 return;
2959 }
2960 PyErr_Clear();
2961}
2962
2963
2964PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002965PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002966{
2967 PyObject *s = PyString_FromString(cp);
2968 if (s == NULL)
2969 return NULL;
2970 PyString_InternInPlace(&s);
2971 return s;
2972}
2973
2974#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002975
2976void
Fred Drakeba096332000-07-09 07:04:36 +00002977PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00002978{
2979 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002980 for (i = 0; i < UCHAR_MAX + 1; i++) {
2981 Py_XDECREF(characters[i]);
2982 characters[i] = NULL;
2983 }
2984#ifndef DONT_SHARE_SHORT_STRINGS
2985 Py_XDECREF(nullstring);
2986 nullstring = NULL;
2987#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002988#ifdef INTERN_STRINGS
2989 if (interned) {
2990 int pos, changed;
2991 PyObject *key, *value;
2992 do {
2993 changed = 0;
2994 pos = 0;
2995 while (PyDict_Next(interned, &pos, &key, &value)) {
2996 if (key->ob_refcnt == 2 && key == value) {
2997 PyDict_DelItem(interned, key);
2998 changed = 1;
2999 }
3000 }
3001 } while (changed);
3002 }
3003#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003004}