blob: eee355173a7b9bb1905137ccaaf37fcf781c9855 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Guido van Rossum03093a21994-09-28 15:51:32 +000012#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#include <limits.h>
14#else
15#ifndef UCHAR_MAX
16#define UCHAR_MAX 255
17#endif
18#endif
19
Guido van Rossumc0b618a1997-05-02 03:12:38 +000020static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000021#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000022static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000023#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000024
25/*
26 Newsizedstringobject() and newstringobject() try in certain cases
27 to share string objects. When the size of the string is zero,
28 these routines always return a pointer to the same string object;
29 when the size is one, they return a pointer to an already existing
30 object if the contents of the string is known. For
31 newstringobject() this is always the case, for
32 newsizedstringobject() this is the case when the first argument in
33 not NULL.
34 A common practice to allocate a string and then fill it in or
35 change it must be done carefully. It is only allowed to change the
36 contents of the string if the obect was gotten from
37 newsizedstringobject() with a NULL first argument, because in the
38 future these routines may try to do even more sharing of objects.
39*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000040PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000041PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000042{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000043 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000044#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045 if (size == 0 && (op = nullstring) != NULL) {
46#ifdef COUNT_ALLOCS
47 null_strings++;
48#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000049 Py_INCREF(op);
50 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 if (size == 1 && str != NULL &&
53 (op = characters[*str & UCHAR_MAX]) != NULL)
54 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055#ifdef COUNT_ALLOCS
56 one_strings++;
57#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000058 Py_INCREF(op);
59 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000060 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000061#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000062
63 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000065 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000066 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000068 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef CACHE_HASH
70 op->ob_shash = -1;
71#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000072#ifdef INTERN_STRINGS
73 op->ob_sinterned = NULL;
74#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (str != NULL)
76 memcpy(op->ob_sval, str, size);
77 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000078#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 if (size == 0) {
80 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 } else if (size == 1 && str != NULL) {
83 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000084 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000086#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000087 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088}
89
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000091PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000093 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 if (size > INT_MAX) {
96 PyErr_SetString(PyExc_OverflowError,
97 "string is too long for a Python string");
98 return NULL;
99 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000100#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 if (size == 0 && (op = nullstring) != NULL) {
102#ifdef COUNT_ALLOCS
103 null_strings++;
104#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105 Py_INCREF(op);
106 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 }
108 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
109#ifdef COUNT_ALLOCS
110 one_strings++;
111#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000112 Py_INCREF(op);
113 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000115#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000116
117 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000119 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000120 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000122 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123#ifdef CACHE_HASH
124 op->ob_shash = -1;
125#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000126#ifdef INTERN_STRINGS
127 op->ob_sinterned = NULL;
128#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000130#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
132 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 } else if (size == 1) {
135 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000138#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000139 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140}
141
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000142PyObject *PyString_Decode(const char *s,
143 int size,
144 const char *encoding,
145 const char *errors)
146{
147 PyObject *buffer = NULL, *str;
148
149 if (encoding == NULL)
150 encoding = PyUnicode_GetDefaultEncoding();
151
152 /* Decode via the codec registry */
153 buffer = PyBuffer_FromMemory((void *)s, size);
154 if (buffer == NULL)
155 goto onError;
156 str = PyCodec_Decode(buffer, encoding, errors);
157 if (str == NULL)
158 goto onError;
159 /* Convert Unicode to a string using the default encoding */
160 if (PyUnicode_Check(str)) {
161 PyObject *temp = str;
162 str = PyUnicode_AsEncodedString(str, NULL, NULL);
163 Py_DECREF(temp);
164 if (str == NULL)
165 goto onError;
166 }
167 if (!PyString_Check(str)) {
168 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000169 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000170 str->ob_type->tp_name);
171 Py_DECREF(str);
172 goto onError;
173 }
174 Py_DECREF(buffer);
175 return str;
176
177 onError:
178 Py_XDECREF(buffer);
179 return NULL;
180}
181
182PyObject *PyString_Encode(const char *s,
183 int size,
184 const char *encoding,
185 const char *errors)
186{
187 PyObject *v, *str;
188
189 str = PyString_FromStringAndSize(s, size);
190 if (str == NULL)
191 return NULL;
192 v = PyString_AsEncodedString(str, encoding, errors);
193 Py_DECREF(str);
194 return v;
195}
196
197PyObject *PyString_AsEncodedString(PyObject *str,
198 const char *encoding,
199 const char *errors)
200{
201 PyObject *v;
202
203 if (!PyString_Check(str)) {
204 PyErr_BadArgument();
205 goto onError;
206 }
207
208 if (encoding == NULL)
209 encoding = PyUnicode_GetDefaultEncoding();
210
211 /* Encode via the codec registry */
212 v = PyCodec_Encode(str, encoding, errors);
213 if (v == NULL)
214 goto onError;
215 /* Convert Unicode to a string using the default encoding */
216 if (PyUnicode_Check(v)) {
217 PyObject *temp = v;
218 v = PyUnicode_AsEncodedString(v, NULL, NULL);
219 Py_DECREF(temp);
220 if (v == NULL)
221 goto onError;
222 }
223 if (!PyString_Check(v)) {
224 PyErr_Format(PyExc_TypeError,
225 "encoder did not return a string object (type=%.400s)",
226 v->ob_type->tp_name);
227 Py_DECREF(v);
228 goto onError;
229 }
230 return v;
231
232 onError:
233 return NULL;
234}
235
Guido van Rossum234f9421993-06-17 12:35:49 +0000236static void
Fred Drakeba096332000-07-09 07:04:36 +0000237string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000238{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000239 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000240}
241
Guido van Rossumd7047b31995-01-02 19:07:15 +0000242int
Fred Drakeba096332000-07-09 07:04:36 +0000243PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000244{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000245 if (!PyString_Check(op)) {
246 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000247 return -1;
248 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000249 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000250}
251
252/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000253PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000255 if (!PyString_Check(op)) {
256 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000257 return NULL;
258 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000259 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262/* Methods */
263
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000264static int
Fred Drakeba096332000-07-09 07:04:36 +0000265string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000266{
267 int i;
268 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000269 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000270 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000273 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000274 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000275
Thomas Wouters7e474022000-07-16 12:04:32 +0000276 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000277 quote = '\'';
278 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
279 quote = '"';
280
281 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282 for (i = 0; i < op->ob_size; i++) {
283 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000284 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000285 fprintf(fp, "\\%c", c);
286 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000287 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000288 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000289 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000291 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000292 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000293}
294
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000295static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000296string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000298 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
299 PyObject *v;
300 if (newsize > INT_MAX) {
301 PyErr_SetString(PyExc_OverflowError,
302 "string is too large to make repr");
303 }
304 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000305 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000306 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307 }
308 else {
309 register int i;
310 register char c;
311 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000312 int quote;
313
Thomas Wouters7e474022000-07-16 12:04:32 +0000314 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000315 quote = '\'';
316 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
317 quote = '"';
318
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000319 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000320 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321 for (i = 0; i < op->ob_size; i++) {
322 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000323 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 *p++ = '\\', *p++ = c;
325 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000326 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000327 while (*p != '\0')
328 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000329 }
330 else
331 *p++ = c;
332 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000335 _PyString_Resize(
336 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000337 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339}
340
341static int
Fred Drakeba096332000-07-09 07:04:36 +0000342string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343{
344 return a->ob_size;
345}
346
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000348string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349{
350 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000351 register PyStringObject *op;
352 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000353 if (PyUnicode_Check(bb))
354 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000355 PyErr_Format(PyExc_TypeError,
356 "cannot add type \"%.200s\" to string",
357 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358 return NULL;
359 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000360#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000361 /* Optimize cases with empty left or right operand */
362 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000363 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000364 return bb;
365 }
366 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000367 Py_INCREF(a);
368 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000369 }
370 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000371 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000372 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000373 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000374 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000375 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000376 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000377#ifdef CACHE_HASH
378 op->ob_shash = -1;
379#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000380#ifdef INTERN_STRINGS
381 op->ob_sinterned = NULL;
382#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000383 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
384 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
385 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000386 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387#undef b
388}
389
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000390static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000391string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000392{
393 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000394 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000395 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000396 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397 if (n < 0)
398 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000399 /* watch out for overflows: the size can overflow int,
400 * and the # of bytes needed can overflow size_t
401 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000403 if (n && size / n != a->ob_size) {
404 PyErr_SetString(PyExc_OverflowError,
405 "repeated string is too long");
406 return NULL;
407 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409 Py_INCREF(a);
410 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000411 }
Tim Peters8f422462000-09-09 06:13:41 +0000412 nbytes = size * sizeof(char);
413 if (nbytes / sizeof(char) != (size_t)size ||
414 nbytes + sizeof(PyStringObject) <= nbytes) {
415 PyErr_SetString(PyExc_OverflowError,
416 "repeated string is too long");
417 return NULL;
418 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000419 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000420 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000421 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000422 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000423 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000424#ifdef CACHE_HASH
425 op->ob_shash = -1;
426#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000427#ifdef INTERN_STRINGS
428 op->ob_sinterned = NULL;
429#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000430 for (i = 0; i < size; i += a->ob_size)
431 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
432 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000434}
435
436/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
437
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000438static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000439string_slice(register PyStringObject *a, register int i, register int j)
440 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000441{
442 if (i < 0)
443 i = 0;
444 if (j < 0)
445 j = 0; /* Avoid signed/unsigned bug in next line */
446 if (j > a->ob_size)
447 j = a->ob_size;
448 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000449 Py_INCREF(a);
450 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000451 }
452 if (j < i)
453 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000454 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000455}
456
Guido van Rossum9284a572000-03-07 15:53:43 +0000457static int
Fred Drakeba096332000-07-09 07:04:36 +0000458string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000459{
460 register char *s, *end;
461 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000462 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000463 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000464 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000465 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000466 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000467 return -1;
468 }
469 c = PyString_AsString(el)[0];
470 s = PyString_AsString(a);
471 end = s + PyString_Size(a);
472 while (s < end) {
473 if (c == *s++)
474 return 1;
475 }
476 return 0;
477}
478
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000480string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000482 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000483 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000484 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000485 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000486 return NULL;
487 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000488 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000489 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000490#ifdef COUNT_ALLOCS
491 if (v != NULL)
492 one_strings++;
493#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000494 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000495 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000496 if (v == NULL)
497 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000498 characters[c] = (PyStringObject *) v;
499 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000500 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000501 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000502 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000503}
504
505static int
Fred Drakeba096332000-07-09 07:04:36 +0000506string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000507{
Guido van Rossum253919f1991-02-13 23:18:39 +0000508 int len_a = a->ob_size, len_b = b->ob_size;
509 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000510 int cmp;
511 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000512 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000513 if (cmp == 0)
514 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
515 if (cmp != 0)
516 return cmp;
517 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000518 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519}
520
Guido van Rossum9bfef441993-03-29 10:43:31 +0000521static long
Fred Drakeba096332000-07-09 07:04:36 +0000522string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000523{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000524 register int len;
525 register unsigned char *p;
526 register long x;
527
528#ifdef CACHE_HASH
529 if (a->ob_shash != -1)
530 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000531#ifdef INTERN_STRINGS
532 if (a->ob_sinterned != NULL)
533 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000534 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000535#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000536#endif
537 len = a->ob_size;
538 p = (unsigned char *) a->ob_sval;
539 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000540 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000541 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000542 x ^= a->ob_size;
543 if (x == -1)
544 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000545#ifdef CACHE_HASH
546 a->ob_shash = x;
547#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000548 return x;
549}
550
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000551static int
Fred Drakeba096332000-07-09 07:04:36 +0000552string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000553{
554 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000555 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000556 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000557 return -1;
558 }
559 *ptr = (void *)self->ob_sval;
560 return self->ob_size;
561}
562
563static int
Fred Drakeba096332000-07-09 07:04:36 +0000564string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000565{
Guido van Rossum045e6881997-09-08 18:30:11 +0000566 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000567 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000568 return -1;
569}
570
571static int
Fred Drakeba096332000-07-09 07:04:36 +0000572string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000573{
574 if ( lenp )
575 *lenp = self->ob_size;
576 return 1;
577}
578
Guido van Rossum1db70701998-10-08 02:18:52 +0000579static int
Fred Drakeba096332000-07-09 07:04:36 +0000580string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000581{
582 if ( index != 0 ) {
583 PyErr_SetString(PyExc_SystemError,
584 "accessing non-existent string segment");
585 return -1;
586 }
587 *ptr = self->ob_sval;
588 return self->ob_size;
589}
590
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000591static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000592 (inquiry)string_length, /*sq_length*/
593 (binaryfunc)string_concat, /*sq_concat*/
594 (intargfunc)string_repeat, /*sq_repeat*/
595 (intargfunc)string_item, /*sq_item*/
596 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000597 0, /*sq_ass_item*/
598 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000599 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000600};
601
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000602static PyBufferProcs string_as_buffer = {
603 (getreadbufferproc)string_buffer_getreadbuf,
604 (getwritebufferproc)string_buffer_getwritebuf,
605 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000606 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000607};
608
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000609
610
611#define LEFTSTRIP 0
612#define RIGHTSTRIP 1
613#define BOTHSTRIP 2
614
615
616static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000617split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000618{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000619 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000620 PyObject* item;
621 PyObject *list = PyList_New(0);
622
623 if (list == NULL)
624 return NULL;
625
Guido van Rossum4c08d552000-03-10 22:55:18 +0000626 for (i = j = 0; i < len; ) {
627 while (i < len && isspace(Py_CHARMASK(s[i])))
628 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000629 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000630 while (i < len && !isspace(Py_CHARMASK(s[i])))
631 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000632 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000633 if (maxsplit-- <= 0)
634 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000635 item = PyString_FromStringAndSize(s+j, (int)(i-j));
636 if (item == NULL)
637 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000638 err = PyList_Append(list, item);
639 Py_DECREF(item);
640 if (err < 0)
641 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000642 while (i < len && isspace(Py_CHARMASK(s[i])))
643 i++;
644 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000645 }
646 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000647 if (j < len) {
648 item = PyString_FromStringAndSize(s+j, (int)(len - j));
649 if (item == NULL)
650 goto finally;
651 err = PyList_Append(list, item);
652 Py_DECREF(item);
653 if (err < 0)
654 goto finally;
655 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000656 return list;
657 finally:
658 Py_DECREF(list);
659 return NULL;
660}
661
662
663static char split__doc__[] =
664"S.split([sep [,maxsplit]]) -> list of strings\n\
665\n\
666Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000667delimiter string. If maxsplit is given, at most maxsplit\n\
668splits are done. If sep is not specified, any whitespace string\n\
669is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000670
671static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000672string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000673{
674 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000675 int maxsplit = -1;
676 const char *s = PyString_AS_STRING(self), *sub;
677 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000678
Guido van Rossum4c08d552000-03-10 22:55:18 +0000679 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000680 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000681 if (maxsplit < 0)
682 maxsplit = INT_MAX;
683 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000684 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000685 if (PyString_Check(subobj)) {
686 sub = PyString_AS_STRING(subobj);
687 n = PyString_GET_SIZE(subobj);
688 }
689 else if (PyUnicode_Check(subobj))
690 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
691 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
692 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000693 if (n == 0) {
694 PyErr_SetString(PyExc_ValueError, "empty separator");
695 return NULL;
696 }
697
698 list = PyList_New(0);
699 if (list == NULL)
700 return NULL;
701
702 i = j = 0;
703 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000704 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000705 if (maxsplit-- <= 0)
706 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000707 item = PyString_FromStringAndSize(s+j, (int)(i-j));
708 if (item == NULL)
709 goto fail;
710 err = PyList_Append(list, item);
711 Py_DECREF(item);
712 if (err < 0)
713 goto fail;
714 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000715 }
716 else
717 i++;
718 }
719 item = PyString_FromStringAndSize(s+j, (int)(len-j));
720 if (item == NULL)
721 goto fail;
722 err = PyList_Append(list, item);
723 Py_DECREF(item);
724 if (err < 0)
725 goto fail;
726
727 return list;
728
729 fail:
730 Py_DECREF(list);
731 return NULL;
732}
733
734
735static char join__doc__[] =
736"S.join(sequence) -> string\n\
737\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000738Return a string which is the concatenation of the strings in the\n\
739sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000740
741static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000742string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000743{
744 char *sep = PyString_AS_STRING(self);
745 int seplen = PyString_GET_SIZE(self);
746 PyObject *res = NULL;
747 int reslen = 0;
748 char *p;
749 int seqlen = 0;
750 int sz = 100;
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000751 int i, slen, sz_incr;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000752 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000753
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000754 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000755 return NULL;
756
Barry Warsaw771d0672000-07-11 04:58:12 +0000757 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000758 if (PyErr_ExceptionMatches(PyExc_TypeError))
759 PyErr_Format(PyExc_TypeError,
760 "sequence expected, %.80s found",
761 orig->ob_type->tp_name);
762 return NULL;
763 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000764 /* From here on out, errors go through finally: for proper
765 * reference count manipulations.
766 */
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000767 seqlen = PySequence_Size(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000768 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000769 item = PySequence_Fast_GET_ITEM(seq, 0);
770 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000771 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000772 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000773 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000774
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000775 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000776 goto finally;
777
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000778 p = PyString_AS_STRING(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000779
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000780 for (i = 0; i < seqlen; i++) {
781 item = PySequence_Fast_GET_ITEM(seq, i);
782 if (!PyString_Check(item)){
783 if (PyUnicode_Check(item)) {
784 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000785 Py_DECREF(seq);
786 return PyUnicode_Join((PyObject *)self, seq);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000787 }
788 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000789 "sequence item %i: expected string,"
790 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000791 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000792 goto finally;
793 }
794 slen = PyString_GET_SIZE(item);
795 while (reslen + slen + seplen >= sz) {
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000796 /* at least double the size of the string */
797 sz_incr = slen + seplen > sz ? slen + seplen : sz;
798 if (_PyString_Resize(&res, sz + sz_incr)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000799 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000800 }
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000801 sz += sz_incr;
802 p = PyString_AS_STRING(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000803 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000804 if (i > 0) {
805 memcpy(p, sep, seplen);
806 p += seplen;
807 reslen += seplen;
808 }
809 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000810 p += slen;
811 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000812 }
813 if (_PyString_Resize(&res, reslen))
814 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000815 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000816 return res;
817
818 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000819 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000820 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821 return NULL;
822}
823
824
825
826static long
Fred Drakeba096332000-07-09 07:04:36 +0000827string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000828{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000829 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000830 int len = PyString_GET_SIZE(self);
831 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000832 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000833
Guido van Rossumc6821402000-05-08 14:08:05 +0000834 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
835 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000836 return -2;
837 if (PyString_Check(subobj)) {
838 sub = PyString_AS_STRING(subobj);
839 n = PyString_GET_SIZE(subobj);
840 }
841 else if (PyUnicode_Check(subobj))
842 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
843 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000844 return -2;
845
846 if (last > len)
847 last = len;
848 if (last < 0)
849 last += len;
850 if (last < 0)
851 last = 0;
852 if (i < 0)
853 i += len;
854 if (i < 0)
855 i = 0;
856
Guido van Rossum4c08d552000-03-10 22:55:18 +0000857 if (dir > 0) {
858 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000859 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000860 last -= n;
861 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000862 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000863 return (long)i;
864 }
865 else {
866 int j;
867
868 if (n == 0 && i <= last)
869 return (long)last;
870 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000871 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000872 return (long)j;
873 }
874
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875 return -1;
876}
877
878
879static char find__doc__[] =
880"S.find(sub [,start [,end]]) -> int\n\
881\n\
882Return the lowest index in S where substring sub is found,\n\
883such that sub is contained within s[start,end]. Optional\n\
884arguments start and end are interpreted as in slice notation.\n\
885\n\
886Return -1 on failure.";
887
888static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000889string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000890{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000892 if (result == -2)
893 return NULL;
894 return PyInt_FromLong(result);
895}
896
897
898static char index__doc__[] =
899"S.index(sub [,start [,end]]) -> int\n\
900\n\
901Like S.find() but raise ValueError when the substring is not found.";
902
903static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000904string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000905{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000906 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000907 if (result == -2)
908 return NULL;
909 if (result == -1) {
910 PyErr_SetString(PyExc_ValueError,
911 "substring not found in string.index");
912 return NULL;
913 }
914 return PyInt_FromLong(result);
915}
916
917
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000918static char rfind__doc__[] =
919"S.rfind(sub [,start [,end]]) -> int\n\
920\n\
921Return the highest index in S where substring sub is found,\n\
922such that sub is contained within s[start,end]. Optional\n\
923arguments start and end are interpreted as in slice notation.\n\
924\n\
925Return -1 on failure.";
926
927static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000928string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000929{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000930 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000931 if (result == -2)
932 return NULL;
933 return PyInt_FromLong(result);
934}
935
936
937static char rindex__doc__[] =
938"S.rindex(sub [,start [,end]]) -> int\n\
939\n\
940Like S.rfind() but raise ValueError when the substring is not found.";
941
942static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000943string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000944{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000945 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000946 if (result == -2)
947 return NULL;
948 if (result == -1) {
949 PyErr_SetString(PyExc_ValueError,
950 "substring not found in string.rindex");
951 return NULL;
952 }
953 return PyInt_FromLong(result);
954}
955
956
957static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000958do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000959{
960 char *s = PyString_AS_STRING(self);
961 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000962
Guido van Rossum43713e52000-02-29 13:59:29 +0000963 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000964 return NULL;
965
966 i = 0;
967 if (striptype != RIGHTSTRIP) {
968 while (i < len && isspace(Py_CHARMASK(s[i]))) {
969 i++;
970 }
971 }
972
973 j = len;
974 if (striptype != LEFTSTRIP) {
975 do {
976 j--;
977 } while (j >= i && isspace(Py_CHARMASK(s[j])));
978 j++;
979 }
980
981 if (i == 0 && j == len) {
982 Py_INCREF(self);
983 return (PyObject*)self;
984 }
985 else
986 return PyString_FromStringAndSize(s+i, j-i);
987}
988
989
990static char strip__doc__[] =
991"S.strip() -> string\n\
992\n\
993Return a copy of the string S with leading and trailing\n\
994whitespace removed.";
995
996static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000997string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000998{
999 return do_strip(self, args, BOTHSTRIP);
1000}
1001
1002
1003static char lstrip__doc__[] =
1004"S.lstrip() -> string\n\
1005\n\
1006Return a copy of the string S with leading whitespace removed.";
1007
1008static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001009string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001010{
1011 return do_strip(self, args, LEFTSTRIP);
1012}
1013
1014
1015static char rstrip__doc__[] =
1016"S.rstrip() -> string\n\
1017\n\
1018Return a copy of the string S with trailing whitespace removed.";
1019
1020static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001021string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001022{
1023 return do_strip(self, args, RIGHTSTRIP);
1024}
1025
1026
1027static char lower__doc__[] =
1028"S.lower() -> string\n\
1029\n\
1030Return a copy of the string S converted to lowercase.";
1031
1032static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001033string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001034{
1035 char *s = PyString_AS_STRING(self), *s_new;
1036 int i, n = PyString_GET_SIZE(self);
1037 PyObject *new;
1038
Guido van Rossum43713e52000-02-29 13:59:29 +00001039 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001040 return NULL;
1041 new = PyString_FromStringAndSize(NULL, n);
1042 if (new == NULL)
1043 return NULL;
1044 s_new = PyString_AsString(new);
1045 for (i = 0; i < n; i++) {
1046 int c = Py_CHARMASK(*s++);
1047 if (isupper(c)) {
1048 *s_new = tolower(c);
1049 } else
1050 *s_new = c;
1051 s_new++;
1052 }
1053 return new;
1054}
1055
1056
1057static char upper__doc__[] =
1058"S.upper() -> string\n\
1059\n\
1060Return a copy of the string S converted to uppercase.";
1061
1062static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001063string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001064{
1065 char *s = PyString_AS_STRING(self), *s_new;
1066 int i, n = PyString_GET_SIZE(self);
1067 PyObject *new;
1068
Guido van Rossum43713e52000-02-29 13:59:29 +00001069 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001070 return NULL;
1071 new = PyString_FromStringAndSize(NULL, n);
1072 if (new == NULL)
1073 return NULL;
1074 s_new = PyString_AsString(new);
1075 for (i = 0; i < n; i++) {
1076 int c = Py_CHARMASK(*s++);
1077 if (islower(c)) {
1078 *s_new = toupper(c);
1079 } else
1080 *s_new = c;
1081 s_new++;
1082 }
1083 return new;
1084}
1085
1086
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087static char title__doc__[] =
1088"S.title() -> string\n\
1089\n\
1090Return a titlecased version of S, i.e. words start with uppercase\n\
1091characters, all remaining cased characters have lowercase.";
1092
1093static PyObject*
1094string_title(PyUnicodeObject *self, PyObject *args)
1095{
1096 char *s = PyString_AS_STRING(self), *s_new;
1097 int i, n = PyString_GET_SIZE(self);
1098 int previous_is_cased = 0;
1099 PyObject *new;
1100
1101 if (!PyArg_ParseTuple(args, ":title"))
1102 return NULL;
1103 new = PyString_FromStringAndSize(NULL, n);
1104 if (new == NULL)
1105 return NULL;
1106 s_new = PyString_AsString(new);
1107 for (i = 0; i < n; i++) {
1108 int c = Py_CHARMASK(*s++);
1109 if (islower(c)) {
1110 if (!previous_is_cased)
1111 c = toupper(c);
1112 previous_is_cased = 1;
1113 } else if (isupper(c)) {
1114 if (previous_is_cased)
1115 c = tolower(c);
1116 previous_is_cased = 1;
1117 } else
1118 previous_is_cased = 0;
1119 *s_new++ = c;
1120 }
1121 return new;
1122}
1123
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001124static char capitalize__doc__[] =
1125"S.capitalize() -> string\n\
1126\n\
1127Return a copy of the string S with only its first character\n\
1128capitalized.";
1129
1130static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001131string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001132{
1133 char *s = PyString_AS_STRING(self), *s_new;
1134 int i, n = PyString_GET_SIZE(self);
1135 PyObject *new;
1136
Guido van Rossum43713e52000-02-29 13:59:29 +00001137 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138 return NULL;
1139 new = PyString_FromStringAndSize(NULL, n);
1140 if (new == NULL)
1141 return NULL;
1142 s_new = PyString_AsString(new);
1143 if (0 < n) {
1144 int c = Py_CHARMASK(*s++);
1145 if (islower(c))
1146 *s_new = toupper(c);
1147 else
1148 *s_new = c;
1149 s_new++;
1150 }
1151 for (i = 1; i < n; i++) {
1152 int c = Py_CHARMASK(*s++);
1153 if (isupper(c))
1154 *s_new = tolower(c);
1155 else
1156 *s_new = c;
1157 s_new++;
1158 }
1159 return new;
1160}
1161
1162
1163static char count__doc__[] =
1164"S.count(sub[, start[, end]]) -> int\n\
1165\n\
1166Return the number of occurrences of substring sub in string\n\
1167S[start:end]. Optional arguments start and end are\n\
1168interpreted as in slice notation.";
1169
1170static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001171string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001172{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001173 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001174 int len = PyString_GET_SIZE(self), n;
1175 int i = 0, last = INT_MAX;
1176 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001177 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001178
Guido van Rossumc6821402000-05-08 14:08:05 +00001179 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1180 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001181 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001182
Guido van Rossum4c08d552000-03-10 22:55:18 +00001183 if (PyString_Check(subobj)) {
1184 sub = PyString_AS_STRING(subobj);
1185 n = PyString_GET_SIZE(subobj);
1186 }
1187 else if (PyUnicode_Check(subobj))
1188 return PyInt_FromLong(
1189 PyUnicode_Count((PyObject *)self, subobj, i, last));
1190 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1191 return NULL;
1192
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001193 if (last > len)
1194 last = len;
1195 if (last < 0)
1196 last += len;
1197 if (last < 0)
1198 last = 0;
1199 if (i < 0)
1200 i += len;
1201 if (i < 0)
1202 i = 0;
1203 m = last + 1 - n;
1204 if (n == 0)
1205 return PyInt_FromLong((long) (m-i));
1206
1207 r = 0;
1208 while (i < m) {
1209 if (!memcmp(s+i, sub, n)) {
1210 r++;
1211 i += n;
1212 } else {
1213 i++;
1214 }
1215 }
1216 return PyInt_FromLong((long) r);
1217}
1218
1219
1220static char swapcase__doc__[] =
1221"S.swapcase() -> string\n\
1222\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001223Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001224converted to lowercase and vice versa.";
1225
1226static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001227string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001228{
1229 char *s = PyString_AS_STRING(self), *s_new;
1230 int i, n = PyString_GET_SIZE(self);
1231 PyObject *new;
1232
Guido van Rossum43713e52000-02-29 13:59:29 +00001233 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001234 return NULL;
1235 new = PyString_FromStringAndSize(NULL, n);
1236 if (new == NULL)
1237 return NULL;
1238 s_new = PyString_AsString(new);
1239 for (i = 0; i < n; i++) {
1240 int c = Py_CHARMASK(*s++);
1241 if (islower(c)) {
1242 *s_new = toupper(c);
1243 }
1244 else if (isupper(c)) {
1245 *s_new = tolower(c);
1246 }
1247 else
1248 *s_new = c;
1249 s_new++;
1250 }
1251 return new;
1252}
1253
1254
1255static char translate__doc__[] =
1256"S.translate(table [,deletechars]) -> string\n\
1257\n\
1258Return a copy of the string S, where all characters occurring\n\
1259in the optional argument deletechars are removed, and the\n\
1260remaining characters have been mapped through the given\n\
1261translation table, which must be a string of length 256.";
1262
1263static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001264string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001265{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001266 register char *input, *output;
1267 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001268 register int i, c, changed = 0;
1269 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001270 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001271 int inlen, tablen, dellen = 0;
1272 PyObject *result;
1273 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001274 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001275
Guido van Rossum4c08d552000-03-10 22:55:18 +00001276 if (!PyArg_ParseTuple(args, "O|O:translate",
1277 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001279
1280 if (PyString_Check(tableobj)) {
1281 table1 = PyString_AS_STRING(tableobj);
1282 tablen = PyString_GET_SIZE(tableobj);
1283 }
1284 else if (PyUnicode_Check(tableobj)) {
1285 /* Unicode .translate() does not support the deletechars
1286 parameter; instead a mapping to None will cause characters
1287 to be deleted. */
1288 if (delobj != NULL) {
1289 PyErr_SetString(PyExc_TypeError,
1290 "deletions are implemented differently for unicode");
1291 return NULL;
1292 }
1293 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1294 }
1295 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001296 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001297
1298 if (delobj != NULL) {
1299 if (PyString_Check(delobj)) {
1300 del_table = PyString_AS_STRING(delobj);
1301 dellen = PyString_GET_SIZE(delobj);
1302 }
1303 else if (PyUnicode_Check(delobj)) {
1304 PyErr_SetString(PyExc_TypeError,
1305 "deletions are implemented differently for unicode");
1306 return NULL;
1307 }
1308 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1309 return NULL;
1310
1311 if (tablen != 256) {
1312 PyErr_SetString(PyExc_ValueError,
1313 "translation table must be 256 characters long");
1314 return NULL;
1315 }
1316 }
1317 else {
1318 del_table = NULL;
1319 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320 }
1321
1322 table = table1;
1323 inlen = PyString_Size(input_obj);
1324 result = PyString_FromStringAndSize((char *)NULL, inlen);
1325 if (result == NULL)
1326 return NULL;
1327 output_start = output = PyString_AsString(result);
1328 input = PyString_AsString(input_obj);
1329
1330 if (dellen == 0) {
1331 /* If no deletions are required, use faster code */
1332 for (i = inlen; --i >= 0; ) {
1333 c = Py_CHARMASK(*input++);
1334 if (Py_CHARMASK((*output++ = table[c])) != c)
1335 changed = 1;
1336 }
1337 if (changed)
1338 return result;
1339 Py_DECREF(result);
1340 Py_INCREF(input_obj);
1341 return input_obj;
1342 }
1343
1344 for (i = 0; i < 256; i++)
1345 trans_table[i] = Py_CHARMASK(table[i]);
1346
1347 for (i = 0; i < dellen; i++)
1348 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1349
1350 for (i = inlen; --i >= 0; ) {
1351 c = Py_CHARMASK(*input++);
1352 if (trans_table[c] != -1)
1353 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1354 continue;
1355 changed = 1;
1356 }
1357 if (!changed) {
1358 Py_DECREF(result);
1359 Py_INCREF(input_obj);
1360 return input_obj;
1361 }
1362 /* Fix the size of the resulting string */
1363 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1364 return NULL;
1365 return result;
1366}
1367
1368
1369/* What follows is used for implementing replace(). Perry Stoll. */
1370
1371/*
1372 mymemfind
1373
1374 strstr replacement for arbitrary blocks of memory.
1375
Barry Warsaw51ac5802000-03-20 16:36:48 +00001376 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001377 contents of memory pointed to by PAT. Returns the index into MEM if
1378 found, or -1 if not found. If len of PAT is greater than length of
1379 MEM, the function returns -1.
1380*/
1381static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001382mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383{
1384 register int ii;
1385
1386 /* pattern can not occur in the last pat_len-1 chars */
1387 len -= pat_len;
1388
1389 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001390 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 return ii;
1392 }
1393 }
1394 return -1;
1395}
1396
1397/*
1398 mymemcnt
1399
1400 Return the number of distinct times PAT is found in MEM.
1401 meaning mem=1111 and pat==11 returns 2.
1402 mem=11111 and pat==11 also return 2.
1403 */
1404static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001405mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406{
1407 register int offset = 0;
1408 int nfound = 0;
1409
1410 while (len >= 0) {
1411 offset = mymemfind(mem, len, pat, pat_len);
1412 if (offset == -1)
1413 break;
1414 mem += offset + pat_len;
1415 len -= offset + pat_len;
1416 nfound++;
1417 }
1418 return nfound;
1419}
1420
1421/*
1422 mymemreplace
1423
Thomas Wouters7e474022000-07-16 12:04:32 +00001424 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 replaced with SUB.
1426
Thomas Wouters7e474022000-07-16 12:04:32 +00001427 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428 of PAT in STR, then the original string is returned. Otherwise, a new
1429 string is allocated here and returned.
1430
1431 on return, out_len is:
1432 the length of output string, or
1433 -1 if the input string is returned, or
1434 unchanged if an error occurs (no memory).
1435
1436 return value is:
1437 the new string allocated locally, or
1438 NULL if an error occurred.
1439*/
1440static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001441mymemreplace(const char *str, int len, /* input string */
1442 const char *pat, int pat_len, /* pattern string to find */
1443 const char *sub, int sub_len, /* substitution string */
1444 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001445 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001446{
1447 char *out_s;
1448 char *new_s;
1449 int nfound, offset, new_len;
1450
1451 if (len == 0 || pat_len > len)
1452 goto return_same;
1453
1454 /* find length of output string */
1455 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001456 if (count < 0)
1457 count = INT_MAX;
1458 else if (nfound > count)
1459 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 if (nfound == 0)
1461 goto return_same;
1462 new_len = len + nfound*(sub_len - pat_len);
1463
Guido van Rossumb18618d2000-05-03 23:44:39 +00001464 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465 if (new_s == NULL) return NULL;
1466
1467 *out_len = new_len;
1468 out_s = new_s;
1469
1470 while (len > 0) {
1471 /* find index of next instance of pattern */
1472 offset = mymemfind(str, len, pat, pat_len);
1473 /* if not found, break out of loop */
1474 if (offset == -1) break;
1475
1476 /* copy non matching part of input string */
1477 memcpy(new_s, str, offset); /* copy part of str before pat */
1478 str += offset + pat_len; /* move str past pattern */
1479 len -= offset + pat_len; /* reduce length of str remaining */
1480
1481 /* copy substitute into the output string */
1482 new_s += offset; /* move new_s to dest for sub string */
1483 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1484 new_s += sub_len; /* offset new_s past sub string */
1485
1486 /* break when we've done count replacements */
1487 if (--count == 0) break;
1488 }
1489 /* copy any remaining values into output string */
1490 if (len > 0)
1491 memcpy(new_s, str, len);
1492 return out_s;
1493
1494 return_same:
1495 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001496 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497}
1498
1499
1500static char replace__doc__[] =
1501"S.replace (old, new[, maxsplit]) -> string\n\
1502\n\
1503Return a copy of string S with all occurrences of substring\n\
1504old replaced by new. If the optional argument maxsplit is\n\
1505given, only the first maxsplit occurrences are replaced.";
1506
1507static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001508string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510 const char *str = PyString_AS_STRING(self), *sub, *repl;
1511 char *new_s;
1512 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1513 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001515 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516
Guido van Rossum4c08d552000-03-10 22:55:18 +00001517 if (!PyArg_ParseTuple(args, "OO|i:replace",
1518 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001520
1521 if (PyString_Check(subobj)) {
1522 sub = PyString_AS_STRING(subobj);
1523 sub_len = PyString_GET_SIZE(subobj);
1524 }
1525 else if (PyUnicode_Check(subobj))
1526 return PyUnicode_Replace((PyObject *)self,
1527 subobj, replobj, count);
1528 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1529 return NULL;
1530
1531 if (PyString_Check(replobj)) {
1532 repl = PyString_AS_STRING(replobj);
1533 repl_len = PyString_GET_SIZE(replobj);
1534 }
1535 else if (PyUnicode_Check(replobj))
1536 return PyUnicode_Replace((PyObject *)self,
1537 subobj, replobj, count);
1538 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1539 return NULL;
1540
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001541 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001542 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001543 return NULL;
1544 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001545 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546 if (new_s == NULL) {
1547 PyErr_NoMemory();
1548 return NULL;
1549 }
1550 if (out_len == -1) {
1551 /* we're returning another reference to self */
1552 new = (PyObject*)self;
1553 Py_INCREF(new);
1554 }
1555 else {
1556 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001557 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 }
1559 return new;
1560}
1561
1562
1563static char startswith__doc__[] =
1564"S.startswith(prefix[, start[, end]]) -> int\n\
1565\n\
1566Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1567optional start, test S beginning at that position. With optional end, stop\n\
1568comparing S at that position.";
1569
1570static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001571string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001573 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576 int plen;
1577 int start = 0;
1578 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001579 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001580
Guido van Rossumc6821402000-05-08 14:08:05 +00001581 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1582 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001583 return NULL;
1584 if (PyString_Check(subobj)) {
1585 prefix = PyString_AS_STRING(subobj);
1586 plen = PyString_GET_SIZE(subobj);
1587 }
1588 else if (PyUnicode_Check(subobj))
1589 return PyInt_FromLong(
1590 PyUnicode_Tailmatch((PyObject *)self,
1591 subobj, start, end, -1));
1592 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593 return NULL;
1594
1595 /* adopt Java semantics for index out of range. it is legal for
1596 * offset to be == plen, but this only returns true if prefix is
1597 * the empty string.
1598 */
1599 if (start < 0 || start+plen > len)
1600 return PyInt_FromLong(0);
1601
1602 if (!memcmp(str+start, prefix, plen)) {
1603 /* did the match end after the specified end? */
1604 if (end < 0)
1605 return PyInt_FromLong(1);
1606 else if (end - start < plen)
1607 return PyInt_FromLong(0);
1608 else
1609 return PyInt_FromLong(1);
1610 }
1611 else return PyInt_FromLong(0);
1612}
1613
1614
1615static char endswith__doc__[] =
1616"S.endswith(suffix[, start[, end]]) -> int\n\
1617\n\
1618Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1619optional start, test S beginning at that position. With optional end, stop\n\
1620comparing S at that position.";
1621
1622static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001623string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001625 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001627 const char* suffix;
1628 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629 int start = 0;
1630 int end = -1;
1631 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001632 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633
Guido van Rossumc6821402000-05-08 14:08:05 +00001634 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1635 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001636 return NULL;
1637 if (PyString_Check(subobj)) {
1638 suffix = PyString_AS_STRING(subobj);
1639 slen = PyString_GET_SIZE(subobj);
1640 }
1641 else if (PyUnicode_Check(subobj))
1642 return PyInt_FromLong(
1643 PyUnicode_Tailmatch((PyObject *)self,
1644 subobj, start, end, +1));
1645 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646 return NULL;
1647
Guido van Rossum4c08d552000-03-10 22:55:18 +00001648 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649 return PyInt_FromLong(0);
1650
1651 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001652 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653
Guido van Rossum4c08d552000-03-10 22:55:18 +00001654 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655 return PyInt_FromLong(1);
1656 else return PyInt_FromLong(0);
1657}
1658
1659
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001660static char encode__doc__[] =
1661"S.encode([encoding[,errors]]) -> string\n\
1662\n\
1663Return an encoded string version of S. Default encoding is the current\n\
1664default string encoding. errors may be given to set a different error\n\
1665handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1666a ValueError. Other possible values are 'ignore' and 'replace'.";
1667
1668static PyObject *
1669string_encode(PyStringObject *self, PyObject *args)
1670{
1671 char *encoding = NULL;
1672 char *errors = NULL;
1673 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1674 return NULL;
1675 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1676}
1677
1678
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679static char expandtabs__doc__[] =
1680"S.expandtabs([tabsize]) -> string\n\
1681\n\
1682Return a copy of S where all tab characters are expanded using spaces.\n\
1683If tabsize is not given, a tab size of 8 characters is assumed.";
1684
1685static PyObject*
1686string_expandtabs(PyStringObject *self, PyObject *args)
1687{
1688 const char *e, *p;
1689 char *q;
1690 int i, j;
1691 PyObject *u;
1692 int tabsize = 8;
1693
1694 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1695 return NULL;
1696
Thomas Wouters7e474022000-07-16 12:04:32 +00001697 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698 i = j = 0;
1699 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1700 for (p = PyString_AS_STRING(self); p < e; p++)
1701 if (*p == '\t') {
1702 if (tabsize > 0)
1703 j += tabsize - (j % tabsize);
1704 }
1705 else {
1706 j++;
1707 if (*p == '\n' || *p == '\r') {
1708 i += j;
1709 j = 0;
1710 }
1711 }
1712
1713 /* Second pass: create output string and fill it */
1714 u = PyString_FromStringAndSize(NULL, i + j);
1715 if (!u)
1716 return NULL;
1717
1718 j = 0;
1719 q = PyString_AS_STRING(u);
1720
1721 for (p = PyString_AS_STRING(self); p < e; p++)
1722 if (*p == '\t') {
1723 if (tabsize > 0) {
1724 i = tabsize - (j % tabsize);
1725 j += i;
1726 while (i--)
1727 *q++ = ' ';
1728 }
1729 }
1730 else {
1731 j++;
1732 *q++ = *p;
1733 if (*p == '\n' || *p == '\r')
1734 j = 0;
1735 }
1736
1737 return u;
1738}
1739
1740static
1741PyObject *pad(PyStringObject *self,
1742 int left,
1743 int right,
1744 char fill)
1745{
1746 PyObject *u;
1747
1748 if (left < 0)
1749 left = 0;
1750 if (right < 0)
1751 right = 0;
1752
1753 if (left == 0 && right == 0) {
1754 Py_INCREF(self);
1755 return (PyObject *)self;
1756 }
1757
1758 u = PyString_FromStringAndSize(NULL,
1759 left + PyString_GET_SIZE(self) + right);
1760 if (u) {
1761 if (left)
1762 memset(PyString_AS_STRING(u), fill, left);
1763 memcpy(PyString_AS_STRING(u) + left,
1764 PyString_AS_STRING(self),
1765 PyString_GET_SIZE(self));
1766 if (right)
1767 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1768 fill, right);
1769 }
1770
1771 return u;
1772}
1773
1774static char ljust__doc__[] =
1775"S.ljust(width) -> string\n\
1776\n\
1777Return S left justified in a string of length width. Padding is\n\
1778done using spaces.";
1779
1780static PyObject *
1781string_ljust(PyStringObject *self, PyObject *args)
1782{
1783 int width;
1784 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1785 return NULL;
1786
1787 if (PyString_GET_SIZE(self) >= width) {
1788 Py_INCREF(self);
1789 return (PyObject*) self;
1790 }
1791
1792 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1793}
1794
1795
1796static char rjust__doc__[] =
1797"S.rjust(width) -> string\n\
1798\n\
1799Return S right justified in a string of length width. Padding is\n\
1800done using spaces.";
1801
1802static PyObject *
1803string_rjust(PyStringObject *self, PyObject *args)
1804{
1805 int width;
1806 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1807 return NULL;
1808
1809 if (PyString_GET_SIZE(self) >= width) {
1810 Py_INCREF(self);
1811 return (PyObject*) self;
1812 }
1813
1814 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1815}
1816
1817
1818static char center__doc__[] =
1819"S.center(width) -> string\n\
1820\n\
1821Return S centered in a string of length width. Padding is done\n\
1822using spaces.";
1823
1824static PyObject *
1825string_center(PyStringObject *self, PyObject *args)
1826{
1827 int marg, left;
1828 int width;
1829
1830 if (!PyArg_ParseTuple(args, "i:center", &width))
1831 return NULL;
1832
1833 if (PyString_GET_SIZE(self) >= width) {
1834 Py_INCREF(self);
1835 return (PyObject*) self;
1836 }
1837
1838 marg = width - PyString_GET_SIZE(self);
1839 left = marg / 2 + (marg & width & 1);
1840
1841 return pad(self, left, marg - left, ' ');
1842}
1843
1844#if 0
1845static char zfill__doc__[] =
1846"S.zfill(width) -> string\n\
1847\n\
1848Pad a numeric string x with zeros on the left, to fill a field\n\
1849of the specified width. The string x is never truncated.";
1850
1851static PyObject *
1852string_zfill(PyStringObject *self, PyObject *args)
1853{
1854 int fill;
1855 PyObject *u;
1856 char *str;
1857
1858 int width;
1859 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1860 return NULL;
1861
1862 if (PyString_GET_SIZE(self) >= width) {
1863 Py_INCREF(self);
1864 return (PyObject*) self;
1865 }
1866
1867 fill = width - PyString_GET_SIZE(self);
1868
1869 u = pad(self, fill, 0, '0');
1870 if (u == NULL)
1871 return NULL;
1872
1873 str = PyString_AS_STRING(u);
1874 if (str[fill] == '+' || str[fill] == '-') {
1875 /* move sign to beginning of string */
1876 str[0] = str[fill];
1877 str[fill] = '0';
1878 }
1879
1880 return u;
1881}
1882#endif
1883
1884static char isspace__doc__[] =
1885"S.isspace() -> int\n\
1886\n\
1887Return 1 if there are only whitespace characters in S,\n\
18880 otherwise.";
1889
1890static PyObject*
1891string_isspace(PyStringObject *self, PyObject *args)
1892{
Fred Drakeba096332000-07-09 07:04:36 +00001893 register const unsigned char *p
1894 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001895 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001896
1897 if (!PyArg_NoArgs(args))
1898 return NULL;
1899
1900 /* Shortcut for single character strings */
1901 if (PyString_GET_SIZE(self) == 1 &&
1902 isspace(*p))
1903 return PyInt_FromLong(1);
1904
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001905 /* Special case for empty strings */
1906 if (PyString_GET_SIZE(self) == 0)
1907 return PyInt_FromLong(0);
1908
Guido van Rossum4c08d552000-03-10 22:55:18 +00001909 e = p + PyString_GET_SIZE(self);
1910 for (; p < e; p++) {
1911 if (!isspace(*p))
1912 return PyInt_FromLong(0);
1913 }
1914 return PyInt_FromLong(1);
1915}
1916
1917
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001918static char isalpha__doc__[] =
1919"S.isalpha() -> int\n\
1920\n\
1921Return 1 if all characters in S are alphabetic\n\
1922and there is at least one character in S, 0 otherwise.";
1923
1924static PyObject*
1925string_isalpha(PyUnicodeObject *self, PyObject *args)
1926{
Fred Drakeba096332000-07-09 07:04:36 +00001927 register const unsigned char *p
1928 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001929 register const unsigned char *e;
1930
1931 if (!PyArg_NoArgs(args))
1932 return NULL;
1933
1934 /* Shortcut for single character strings */
1935 if (PyString_GET_SIZE(self) == 1 &&
1936 isalpha(*p))
1937 return PyInt_FromLong(1);
1938
1939 /* Special case for empty strings */
1940 if (PyString_GET_SIZE(self) == 0)
1941 return PyInt_FromLong(0);
1942
1943 e = p + PyString_GET_SIZE(self);
1944 for (; p < e; p++) {
1945 if (!isalpha(*p))
1946 return PyInt_FromLong(0);
1947 }
1948 return PyInt_FromLong(1);
1949}
1950
1951
1952static char isalnum__doc__[] =
1953"S.isalnum() -> int\n\
1954\n\
1955Return 1 if all characters in S are alphanumeric\n\
1956and there is at least one character in S, 0 otherwise.";
1957
1958static PyObject*
1959string_isalnum(PyUnicodeObject *self, PyObject *args)
1960{
Fred Drakeba096332000-07-09 07:04:36 +00001961 register const unsigned char *p
1962 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001963 register const unsigned char *e;
1964
1965 if (!PyArg_NoArgs(args))
1966 return NULL;
1967
1968 /* Shortcut for single character strings */
1969 if (PyString_GET_SIZE(self) == 1 &&
1970 isalnum(*p))
1971 return PyInt_FromLong(1);
1972
1973 /* Special case for empty strings */
1974 if (PyString_GET_SIZE(self) == 0)
1975 return PyInt_FromLong(0);
1976
1977 e = p + PyString_GET_SIZE(self);
1978 for (; p < e; p++) {
1979 if (!isalnum(*p))
1980 return PyInt_FromLong(0);
1981 }
1982 return PyInt_FromLong(1);
1983}
1984
1985
Guido van Rossum4c08d552000-03-10 22:55:18 +00001986static char isdigit__doc__[] =
1987"S.isdigit() -> int\n\
1988\n\
1989Return 1 if there are only digit characters in S,\n\
19900 otherwise.";
1991
1992static PyObject*
1993string_isdigit(PyStringObject *self, PyObject *args)
1994{
Fred Drakeba096332000-07-09 07:04:36 +00001995 register const unsigned char *p
1996 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001997 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001998
1999 if (!PyArg_NoArgs(args))
2000 return NULL;
2001
2002 /* Shortcut for single character strings */
2003 if (PyString_GET_SIZE(self) == 1 &&
2004 isdigit(*p))
2005 return PyInt_FromLong(1);
2006
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002007 /* Special case for empty strings */
2008 if (PyString_GET_SIZE(self) == 0)
2009 return PyInt_FromLong(0);
2010
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011 e = p + PyString_GET_SIZE(self);
2012 for (; p < e; p++) {
2013 if (!isdigit(*p))
2014 return PyInt_FromLong(0);
2015 }
2016 return PyInt_FromLong(1);
2017}
2018
2019
2020static char islower__doc__[] =
2021"S.islower() -> int\n\
2022\n\
2023Return 1 if all cased characters in S are lowercase and there is\n\
2024at least one cased character in S, 0 otherwise.";
2025
2026static PyObject*
2027string_islower(PyStringObject *self, PyObject *args)
2028{
Fred Drakeba096332000-07-09 07:04:36 +00002029 register const unsigned char *p
2030 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002031 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002032 int cased;
2033
2034 if (!PyArg_NoArgs(args))
2035 return NULL;
2036
2037 /* Shortcut for single character strings */
2038 if (PyString_GET_SIZE(self) == 1)
2039 return PyInt_FromLong(islower(*p) != 0);
2040
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002041 /* Special case for empty strings */
2042 if (PyString_GET_SIZE(self) == 0)
2043 return PyInt_FromLong(0);
2044
Guido van Rossum4c08d552000-03-10 22:55:18 +00002045 e = p + PyString_GET_SIZE(self);
2046 cased = 0;
2047 for (; p < e; p++) {
2048 if (isupper(*p))
2049 return PyInt_FromLong(0);
2050 else if (!cased && islower(*p))
2051 cased = 1;
2052 }
2053 return PyInt_FromLong(cased);
2054}
2055
2056
2057static char isupper__doc__[] =
2058"S.isupper() -> int\n\
2059\n\
2060Return 1 if all cased characters in S are uppercase and there is\n\
2061at least one cased character in S, 0 otherwise.";
2062
2063static PyObject*
2064string_isupper(PyStringObject *self, PyObject *args)
2065{
Fred Drakeba096332000-07-09 07:04:36 +00002066 register const unsigned char *p
2067 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002068 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069 int cased;
2070
2071 if (!PyArg_NoArgs(args))
2072 return NULL;
2073
2074 /* Shortcut for single character strings */
2075 if (PyString_GET_SIZE(self) == 1)
2076 return PyInt_FromLong(isupper(*p) != 0);
2077
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002078 /* Special case for empty strings */
2079 if (PyString_GET_SIZE(self) == 0)
2080 return PyInt_FromLong(0);
2081
Guido van Rossum4c08d552000-03-10 22:55:18 +00002082 e = p + PyString_GET_SIZE(self);
2083 cased = 0;
2084 for (; p < e; p++) {
2085 if (islower(*p))
2086 return PyInt_FromLong(0);
2087 else if (!cased && isupper(*p))
2088 cased = 1;
2089 }
2090 return PyInt_FromLong(cased);
2091}
2092
2093
2094static char istitle__doc__[] =
2095"S.istitle() -> int\n\
2096\n\
2097Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2098may only follow uncased characters and lowercase characters only cased\n\
2099ones. Return 0 otherwise.";
2100
2101static PyObject*
2102string_istitle(PyStringObject *self, PyObject *args)
2103{
Fred Drakeba096332000-07-09 07:04:36 +00002104 register const unsigned char *p
2105 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002106 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 int cased, previous_is_cased;
2108
2109 if (!PyArg_NoArgs(args))
2110 return NULL;
2111
2112 /* Shortcut for single character strings */
2113 if (PyString_GET_SIZE(self) == 1)
2114 return PyInt_FromLong(isupper(*p) != 0);
2115
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002116 /* Special case for empty strings */
2117 if (PyString_GET_SIZE(self) == 0)
2118 return PyInt_FromLong(0);
2119
Guido van Rossum4c08d552000-03-10 22:55:18 +00002120 e = p + PyString_GET_SIZE(self);
2121 cased = 0;
2122 previous_is_cased = 0;
2123 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002124 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002125
2126 if (isupper(ch)) {
2127 if (previous_is_cased)
2128 return PyInt_FromLong(0);
2129 previous_is_cased = 1;
2130 cased = 1;
2131 }
2132 else if (islower(ch)) {
2133 if (!previous_is_cased)
2134 return PyInt_FromLong(0);
2135 previous_is_cased = 1;
2136 cased = 1;
2137 }
2138 else
2139 previous_is_cased = 0;
2140 }
2141 return PyInt_FromLong(cased);
2142}
2143
2144
2145static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002146"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002147\n\
2148Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002149Line breaks are not included in the resulting list unless keepends\n\
2150is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002151
2152#define SPLIT_APPEND(data, left, right) \
2153 str = PyString_FromStringAndSize(data + left, right - left); \
2154 if (!str) \
2155 goto onError; \
2156 if (PyList_Append(list, str)) { \
2157 Py_DECREF(str); \
2158 goto onError; \
2159 } \
2160 else \
2161 Py_DECREF(str);
2162
2163static PyObject*
2164string_splitlines(PyStringObject *self, PyObject *args)
2165{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002166 register int i;
2167 register int j;
2168 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002169 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002170 PyObject *list;
2171 PyObject *str;
2172 char *data;
2173
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002174 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175 return NULL;
2176
2177 data = PyString_AS_STRING(self);
2178 len = PyString_GET_SIZE(self);
2179
Guido van Rossum4c08d552000-03-10 22:55:18 +00002180 list = PyList_New(0);
2181 if (!list)
2182 goto onError;
2183
2184 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002185 int eol;
2186
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187 /* Find a line and append it */
2188 while (i < len && data[i] != '\n' && data[i] != '\r')
2189 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190
2191 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002192 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193 if (i < len) {
2194 if (data[i] == '\r' && i + 1 < len &&
2195 data[i+1] == '\n')
2196 i += 2;
2197 else
2198 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002199 if (keepends)
2200 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002202 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 j = i;
2204 }
2205 if (j < len) {
2206 SPLIT_APPEND(data, j, len);
2207 }
2208
2209 return list;
2210
2211 onError:
2212 Py_DECREF(list);
2213 return NULL;
2214}
2215
2216#undef SPLIT_APPEND
2217
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218
2219static PyMethodDef
2220string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221 /* Counterparts of the obsolete stropmodule functions; except
2222 string.maketrans(). */
2223 {"join", (PyCFunction)string_join, 1, join__doc__},
2224 {"split", (PyCFunction)string_split, 1, split__doc__},
2225 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2226 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2227 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2228 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2229 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2230 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2231 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002232 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2233 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2235 {"count", (PyCFunction)string_count, 1, count__doc__},
2236 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2237 {"find", (PyCFunction)string_find, 1, find__doc__},
2238 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2241 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2242 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2243 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2245 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2246 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2248 {"title", (PyCFunction)string_title, 1, title__doc__},
2249 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2250 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2251 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002252 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2254 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2255#if 0
2256 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2257#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258 {NULL, NULL} /* sentinel */
2259};
2260
2261static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002262string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263{
2264 return Py_FindMethod(string_methods, (PyObject*)s, name);
2265}
2266
2267
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002268PyTypeObject PyString_Type = {
2269 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002270 0,
2271 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002272 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002273 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002274 (destructor)string_dealloc, /*tp_dealloc*/
2275 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002276 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002277 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002278 (cmpfunc)string_compare, /*tp_compare*/
2279 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002280 0, /*tp_as_number*/
2281 &string_as_sequence, /*tp_as_sequence*/
2282 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002283 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002284 0, /*tp_call*/
2285 0, /*tp_str*/
2286 0, /*tp_getattro*/
2287 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002288 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002289 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002290 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002291};
2292
2293void
Fred Drakeba096332000-07-09 07:04:36 +00002294PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002295{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002296 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002297 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002298 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002299 if (w == NULL || !PyString_Check(*pv)) {
2300 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002301 *pv = NULL;
2302 return;
2303 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002304 v = string_concat((PyStringObject *) *pv, w);
2305 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002306 *pv = v;
2307}
2308
Guido van Rossum013142a1994-08-30 08:19:36 +00002309void
Fred Drakeba096332000-07-09 07:04:36 +00002310PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002311{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002312 PyString_Concat(pv, w);
2313 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002314}
2315
2316
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002317/* The following function breaks the notion that strings are immutable:
2318 it changes the size of a string. We get away with this only if there
2319 is only one module referencing the object. You can also think of it
2320 as creating a new string object and destroying the old one, only
2321 more efficiently. In any case, don't use this if the string may
2322 already be known to some other part of the code... */
2323
2324int
Fred Drakeba096332000-07-09 07:04:36 +00002325_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002326{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002327 register PyObject *v;
2328 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002329 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002330 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002331 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002332 Py_DECREF(v);
2333 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002334 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002335 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002336 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002337#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002338 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002339#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002340 _Py_ForgetReference(v);
2341 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002342 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002343 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002344 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002345 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002346 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002347 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002348 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002349 _Py_NewReference(*pv);
2350 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002351 sv->ob_size = newsize;
2352 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002353 return 0;
2354}
Guido van Rossume5372401993-03-16 12:15:04 +00002355
2356/* Helpers for formatstring */
2357
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002358static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002359getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002360{
2361 int argidx = *p_argidx;
2362 if (argidx < arglen) {
2363 (*p_argidx)++;
2364 if (arglen < 0)
2365 return args;
2366 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002367 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002368 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002369 PyErr_SetString(PyExc_TypeError,
2370 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002371 return NULL;
2372}
2373
2374#define F_LJUST (1<<0)
2375#define F_SIGN (1<<1)
2376#define F_BLANK (1<<2)
2377#define F_ALT (1<<3)
2378#define F_ZERO (1<<4)
2379
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002380static int
Fred Drakeba096332000-07-09 07:04:36 +00002381formatfloat(char *buf, size_t buflen, int flags,
2382 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002383{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002384 /* fmt = '%#.' + `prec` + `type`
2385 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002386 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002387 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002388 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002389 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002390 if (prec < 0)
2391 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002392 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2393 type = 'g';
2394 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002395 /* worst case length calc to ensure no buffer overrun:
2396 fmt = %#.<prec>g
2397 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2398 for any double rep.)
2399 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2400 If prec=0 the effective precision is 1 (the leading digit is
2401 always given), therefore increase by one to 10+prec. */
2402 if (buflen <= (size_t)10 + (size_t)prec) {
2403 PyErr_SetString(PyExc_OverflowError,
2404 "formatted float is too long (precision too long?)");
2405 return -1;
2406 }
Guido van Rossume5372401993-03-16 12:15:04 +00002407 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002408 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002409}
2410
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002411static int
Fred Drakeba096332000-07-09 07:04:36 +00002412formatint(char *buf, size_t buflen, int flags,
2413 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002414{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002415 /* fmt = '%#.' + `prec` + 'l' + `type`
2416 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002417 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002418 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002419 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002420 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002421 if (prec < 0)
2422 prec = 1;
2423 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002424 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2425 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2426 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2427 PyErr_SetString(PyExc_OverflowError,
2428 "formatted integer is too long (precision too long?)");
2429 return -1;
2430 }
Guido van Rossume5372401993-03-16 12:15:04 +00002431 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002432 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002433}
2434
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002435static int
Fred Drakeba096332000-07-09 07:04:36 +00002436formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002437{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002438 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002439 if (PyString_Check(v)) {
2440 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002441 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002442 }
2443 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002444 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002445 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002446 }
2447 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002448 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002449}
2450
Guido van Rossum013142a1994-08-30 08:19:36 +00002451
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002452/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2453
2454 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2455 chars are formatted. XXX This is a magic number. Each formatting
2456 routine does bounds checking to ensure no overflow, but a better
2457 solution may be to malloc a buffer of appropriate size for each
2458 format. For now, the current solution is sufficient.
2459*/
2460#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002461
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002462PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002463PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002464{
2465 char *fmt, *res;
2466 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002467 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002468 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002469 PyObject *dict = NULL;
2470 if (format == NULL || !PyString_Check(format) || args == NULL) {
2471 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002472 return NULL;
2473 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002474 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002475 fmt = PyString_AsString(format);
2476 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002477 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002478 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002479 if (result == NULL)
2480 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002481 res = PyString_AsString(result);
2482 if (PyTuple_Check(args)) {
2483 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002484 argidx = 0;
2485 }
2486 else {
2487 arglen = -1;
2488 argidx = -2;
2489 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002490 if (args->ob_type->tp_as_mapping)
2491 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002492 while (--fmtcnt >= 0) {
2493 if (*fmt != '%') {
2494 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002495 rescnt = fmtcnt + 100;
2496 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002497 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002498 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002499 res = PyString_AsString(result)
2500 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002501 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002502 }
2503 *res++ = *fmt++;
2504 }
2505 else {
2506 /* Got a format specifier */
2507 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002508 int width = -1;
2509 int prec = -1;
2510 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002511 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002512 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002513 PyObject *v = NULL;
2514 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002515 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002516 int sign;
2517 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002518 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002519 char *fmt_start = fmt;
2520
Guido van Rossumda9c2711996-12-05 21:58:58 +00002521 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002522 if (*fmt == '(') {
2523 char *keystart;
2524 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002525 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002526 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002527
2528 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002529 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002530 "format requires a mapping");
2531 goto error;
2532 }
2533 ++fmt;
2534 --fmtcnt;
2535 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002536 /* Skip over balanced parentheses */
2537 while (pcount > 0 && --fmtcnt >= 0) {
2538 if (*fmt == ')')
2539 --pcount;
2540 else if (*fmt == '(')
2541 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002542 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002543 }
2544 keylen = fmt - keystart - 1;
2545 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002546 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002547 "incomplete format key");
2548 goto error;
2549 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002550 key = PyString_FromStringAndSize(keystart,
2551 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002552 if (key == NULL)
2553 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002554 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002555 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002556 args_owned = 0;
2557 }
2558 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002559 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002560 if (args == NULL) {
2561 goto error;
2562 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002563 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002564 arglen = -1;
2565 argidx = -2;
2566 }
Guido van Rossume5372401993-03-16 12:15:04 +00002567 while (--fmtcnt >= 0) {
2568 switch (c = *fmt++) {
2569 case '-': flags |= F_LJUST; continue;
2570 case '+': flags |= F_SIGN; continue;
2571 case ' ': flags |= F_BLANK; continue;
2572 case '#': flags |= F_ALT; continue;
2573 case '0': flags |= F_ZERO; continue;
2574 }
2575 break;
2576 }
2577 if (c == '*') {
2578 v = getnextarg(args, arglen, &argidx);
2579 if (v == NULL)
2580 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002581 if (!PyInt_Check(v)) {
2582 PyErr_SetString(PyExc_TypeError,
2583 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002584 goto error;
2585 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002586 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002587 if (width < 0) {
2588 flags |= F_LJUST;
2589 width = -width;
2590 }
Guido van Rossume5372401993-03-16 12:15:04 +00002591 if (--fmtcnt >= 0)
2592 c = *fmt++;
2593 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002594 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002595 width = c - '0';
2596 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002597 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002598 if (!isdigit(c))
2599 break;
2600 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002601 PyErr_SetString(
2602 PyExc_ValueError,
2603 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002604 goto error;
2605 }
2606 width = width*10 + (c - '0');
2607 }
2608 }
2609 if (c == '.') {
2610 prec = 0;
2611 if (--fmtcnt >= 0)
2612 c = *fmt++;
2613 if (c == '*') {
2614 v = getnextarg(args, arglen, &argidx);
2615 if (v == NULL)
2616 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002617 if (!PyInt_Check(v)) {
2618 PyErr_SetString(
2619 PyExc_TypeError,
2620 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002621 goto error;
2622 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002623 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002624 if (prec < 0)
2625 prec = 0;
2626 if (--fmtcnt >= 0)
2627 c = *fmt++;
2628 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002629 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002630 prec = c - '0';
2631 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002632 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002633 if (!isdigit(c))
2634 break;
2635 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002636 PyErr_SetString(
2637 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002638 "prec too big");
2639 goto error;
2640 }
2641 prec = prec*10 + (c - '0');
2642 }
2643 }
2644 } /* prec */
2645 if (fmtcnt >= 0) {
2646 if (c == 'h' || c == 'l' || c == 'L') {
2647 size = c;
2648 if (--fmtcnt >= 0)
2649 c = *fmt++;
2650 }
2651 }
2652 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002653 PyErr_SetString(PyExc_ValueError,
2654 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002655 goto error;
2656 }
2657 if (c != '%') {
2658 v = getnextarg(args, arglen, &argidx);
2659 if (v == NULL)
2660 goto error;
2661 }
2662 sign = 0;
2663 fill = ' ';
2664 switch (c) {
2665 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002666 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002667 len = 1;
2668 break;
2669 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002670 case 'r':
2671 if (PyUnicode_Check(v)) {
2672 fmt = fmt_start;
2673 goto unicode;
2674 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002675 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002676 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002677 else
2678 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002679 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002680 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002681 if (!PyString_Check(temp)) {
2682 PyErr_SetString(PyExc_TypeError,
2683 "%s argument has non-string str()");
2684 goto error;
2685 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002686 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002687 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002688 if (prec >= 0 && len > prec)
2689 len = prec;
2690 break;
2691 case 'i':
2692 case 'd':
2693 case 'u':
2694 case 'o':
2695 case 'x':
2696 case 'X':
2697 if (c == 'i')
2698 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002699 pbuf = formatbuf;
2700 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002701 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002702 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002703 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002704 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002705 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002706 if ((flags&F_ALT) &&
2707 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002708 pbuf[0] == '0' && pbuf[1] == c) {
2709 *res++ = *pbuf++;
2710 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002711 rescnt -= 2;
2712 len -= 2;
2713 width -= 2;
2714 if (width < 0)
2715 width = 0;
2716 }
2717 }
Guido van Rossume5372401993-03-16 12:15:04 +00002718 break;
2719 case 'e':
2720 case 'E':
2721 case 'f':
2722 case 'g':
2723 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002724 pbuf = formatbuf;
2725 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002726 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002727 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002728 sign = 1;
2729 if (flags&F_ZERO)
2730 fill = '0';
2731 break;
2732 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002733 pbuf = formatbuf;
2734 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002735 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002736 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002737 break;
2738 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002739 PyErr_Format(PyExc_ValueError,
2740 "unsupported format character '%c' (0x%x)",
2741 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002742 goto error;
2743 }
2744 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002745 if (*pbuf == '-' || *pbuf == '+') {
2746 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002747 len--;
2748 }
2749 else if (flags & F_SIGN)
2750 sign = '+';
2751 else if (flags & F_BLANK)
2752 sign = ' ';
2753 else
2754 sign = '\0';
2755 }
2756 if (width < len)
2757 width = len;
2758 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002759 reslen -= rescnt;
2760 rescnt = width + fmtcnt + 100;
2761 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002762 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002763 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002764 res = PyString_AsString(result)
2765 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002766 }
2767 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002768 if (fill != ' ')
2769 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002770 rescnt--;
2771 if (width > len)
2772 width--;
2773 }
2774 if (width > len && !(flags&F_LJUST)) {
2775 do {
2776 --rescnt;
2777 *res++ = fill;
2778 } while (--width > len);
2779 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002780 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002781 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002782 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002783 res += len;
2784 rescnt -= len;
2785 while (--width >= len) {
2786 --rescnt;
2787 *res++ = ' ';
2788 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002789 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002791 "not all arguments converted");
2792 goto error;
2793 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002794 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002795 } /* '%' */
2796 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002797 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 PyErr_SetString(PyExc_TypeError,
2799 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002800 goto error;
2801 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002802 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002803 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002804 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002805 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002806 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002807
2808 unicode:
2809 if (args_owned) {
2810 Py_DECREF(args);
2811 args_owned = 0;
2812 }
2813 /* Fiddle args right (remove the first argidx-1 arguments) */
2814 --argidx;
2815 if (PyTuple_Check(orig_args) && argidx > 0) {
2816 PyObject *v;
2817 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2818 v = PyTuple_New(n);
2819 if (v == NULL)
2820 goto error;
2821 while (--n >= 0) {
2822 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2823 Py_INCREF(w);
2824 PyTuple_SET_ITEM(v, n, w);
2825 }
2826 args = v;
2827 } else {
2828 Py_INCREF(orig_args);
2829 args = orig_args;
2830 }
2831 /* Paste rest of format string to what we have of the result
2832 string; we reuse result for this */
2833 rescnt = res - PyString_AS_STRING(result);
2834 fmtcnt = PyString_GET_SIZE(format) - \
2835 (fmt - PyString_AS_STRING(format));
2836 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2837 Py_DECREF(args);
2838 goto error;
2839 }
2840 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2841 format = result;
2842 /* Let Unicode do its magic */
2843 result = PyUnicode_Format(format, args);
2844 Py_DECREF(format);
2845 Py_DECREF(args);
2846 return result;
2847
Guido van Rossume5372401993-03-16 12:15:04 +00002848 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002849 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002850 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002851 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002852 }
Guido van Rossume5372401993-03-16 12:15:04 +00002853 return NULL;
2854}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002855
2856
2857#ifdef INTERN_STRINGS
2858
Barry Warsaw4df762f2000-08-16 23:41:01 +00002859/* This dictionary will leak at PyString_Fini() time. That's acceptable
2860 * because PyString_Fini() specifically frees interned strings that are
2861 * only referenced by this dictionary. The CVS log entry for revision 2.45
2862 * says:
2863 *
2864 * Change the Fini function to only remove otherwise unreferenced
2865 * strings from the interned table. There are references in
2866 * hard-to-find static variables all over the interpreter, and it's not
2867 * worth trying to get rid of all those; but "uninterning" isn't fair
2868 * either and may cause subtle failures later -- so we have to keep them
2869 * in the interned table.
2870 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00002871static PyObject *interned;
2872
2873void
Fred Drakeba096332000-07-09 07:04:36 +00002874PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002875{
2876 register PyStringObject *s = (PyStringObject *)(*p);
2877 PyObject *t;
2878 if (s == NULL || !PyString_Check(s))
2879 Py_FatalError("PyString_InternInPlace: strings only please!");
2880 if ((t = s->ob_sinterned) != NULL) {
2881 if (t == (PyObject *)s)
2882 return;
2883 Py_INCREF(t);
2884 *p = t;
2885 Py_DECREF(s);
2886 return;
2887 }
2888 if (interned == NULL) {
2889 interned = PyDict_New();
2890 if (interned == NULL)
2891 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002892 }
2893 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2894 Py_INCREF(t);
2895 *p = s->ob_sinterned = t;
2896 Py_DECREF(s);
2897 return;
2898 }
2899 t = (PyObject *)s;
2900 if (PyDict_SetItem(interned, t, t) == 0) {
2901 s->ob_sinterned = t;
2902 return;
2903 }
2904 PyErr_Clear();
2905}
2906
2907
2908PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002909PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002910{
2911 PyObject *s = PyString_FromString(cp);
2912 if (s == NULL)
2913 return NULL;
2914 PyString_InternInPlace(&s);
2915 return s;
2916}
2917
2918#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002919
2920void
Fred Drakeba096332000-07-09 07:04:36 +00002921PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00002922{
2923 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002924 for (i = 0; i < UCHAR_MAX + 1; i++) {
2925 Py_XDECREF(characters[i]);
2926 characters[i] = NULL;
2927 }
2928#ifndef DONT_SHARE_SHORT_STRINGS
2929 Py_XDECREF(nullstring);
2930 nullstring = NULL;
2931#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002932#ifdef INTERN_STRINGS
2933 if (interned) {
2934 int pos, changed;
2935 PyObject *key, *value;
2936 do {
2937 changed = 0;
2938 pos = 0;
2939 while (PyDict_Next(interned, &pos, &key, &value)) {
2940 if (key->ob_refcnt == 2 && key == value) {
2941 PyDict_DelItem(interned, key);
2942 changed = 1;
2943 }
2944 }
2945 } while (changed);
2946 }
2947#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002948}