blob: acae88032549493f7d22460822799bd8203b0023 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Guido van Rossum03093a21994-09-28 15:51:32 +000012#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#include <limits.h>
14#else
15#ifndef UCHAR_MAX
16#define UCHAR_MAX 255
17#endif
18#endif
19
Guido van Rossumc0b618a1997-05-02 03:12:38 +000020static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000021#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000022static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000023#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000024
25/*
26 Newsizedstringobject() and newstringobject() try in certain cases
27 to share string objects. When the size of the string is zero,
28 these routines always return a pointer to the same string object;
29 when the size is one, they return a pointer to an already existing
30 object if the contents of the string is known. For
31 newstringobject() this is always the case, for
32 newsizedstringobject() this is the case when the first argument in
33 not NULL.
34 A common practice to allocate a string and then fill it in or
35 change it must be done carefully. It is only allowed to change the
36 contents of the string if the obect was gotten from
37 newsizedstringobject() with a NULL first argument, because in the
38 future these routines may try to do even more sharing of objects.
39*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000040PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000041PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000042{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000043 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000044#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045 if (size == 0 && (op = nullstring) != NULL) {
46#ifdef COUNT_ALLOCS
47 null_strings++;
48#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000049 Py_INCREF(op);
50 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 if (size == 1 && str != NULL &&
53 (op = characters[*str & UCHAR_MAX]) != NULL)
54 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055#ifdef COUNT_ALLOCS
56 one_strings++;
57#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000058 Py_INCREF(op);
59 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000060 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000061#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000062
63 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000065 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000066 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000068 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef CACHE_HASH
70 op->ob_shash = -1;
71#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000072#ifdef INTERN_STRINGS
73 op->ob_sinterned = NULL;
74#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (str != NULL)
76 memcpy(op->ob_sval, str, size);
77 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000078#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 if (size == 0) {
80 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000081 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 } else if (size == 1 && str != NULL) {
83 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000084 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000086#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000087 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088}
89
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000091PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000092{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000093 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 if (size > INT_MAX) {
96 PyErr_SetString(PyExc_OverflowError,
97 "string is too long for a Python string");
98 return NULL;
99 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000100#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 if (size == 0 && (op = nullstring) != NULL) {
102#ifdef COUNT_ALLOCS
103 null_strings++;
104#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105 Py_INCREF(op);
106 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 }
108 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
109#ifdef COUNT_ALLOCS
110 one_strings++;
111#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000112 Py_INCREF(op);
113 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000115#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000116
117 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000119 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000120 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000122 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123#ifdef CACHE_HASH
124 op->ob_shash = -1;
125#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000126#ifdef INTERN_STRINGS
127 op->ob_sinterned = NULL;
128#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000129 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000130#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
132 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 } else if (size == 1) {
135 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000138#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000139 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000140}
141
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000142PyObject *PyString_Decode(const char *s,
143 int size,
144 const char *encoding,
145 const char *errors)
146{
147 PyObject *buffer = NULL, *str;
148
149 if (encoding == NULL)
150 encoding = PyUnicode_GetDefaultEncoding();
151
152 /* Decode via the codec registry */
153 buffer = PyBuffer_FromMemory((void *)s, size);
154 if (buffer == NULL)
155 goto onError;
156 str = PyCodec_Decode(buffer, encoding, errors);
157 if (str == NULL)
158 goto onError;
159 /* Convert Unicode to a string using the default encoding */
160 if (PyUnicode_Check(str)) {
161 PyObject *temp = str;
162 str = PyUnicode_AsEncodedString(str, NULL, NULL);
163 Py_DECREF(temp);
164 if (str == NULL)
165 goto onError;
166 }
167 if (!PyString_Check(str)) {
168 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000169 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000170 str->ob_type->tp_name);
171 Py_DECREF(str);
172 goto onError;
173 }
174 Py_DECREF(buffer);
175 return str;
176
177 onError:
178 Py_XDECREF(buffer);
179 return NULL;
180}
181
182PyObject *PyString_Encode(const char *s,
183 int size,
184 const char *encoding,
185 const char *errors)
186{
187 PyObject *v, *str;
188
189 str = PyString_FromStringAndSize(s, size);
190 if (str == NULL)
191 return NULL;
192 v = PyString_AsEncodedString(str, encoding, errors);
193 Py_DECREF(str);
194 return v;
195}
196
197PyObject *PyString_AsEncodedString(PyObject *str,
198 const char *encoding,
199 const char *errors)
200{
201 PyObject *v;
202
203 if (!PyString_Check(str)) {
204 PyErr_BadArgument();
205 goto onError;
206 }
207
208 if (encoding == NULL)
209 encoding = PyUnicode_GetDefaultEncoding();
210
211 /* Encode via the codec registry */
212 v = PyCodec_Encode(str, encoding, errors);
213 if (v == NULL)
214 goto onError;
215 /* Convert Unicode to a string using the default encoding */
216 if (PyUnicode_Check(v)) {
217 PyObject *temp = v;
218 v = PyUnicode_AsEncodedString(v, NULL, NULL);
219 Py_DECREF(temp);
220 if (v == NULL)
221 goto onError;
222 }
223 if (!PyString_Check(v)) {
224 PyErr_Format(PyExc_TypeError,
225 "encoder did not return a string object (type=%.400s)",
226 v->ob_type->tp_name);
227 Py_DECREF(v);
228 goto onError;
229 }
230 return v;
231
232 onError:
233 return NULL;
234}
235
Guido van Rossum234f9421993-06-17 12:35:49 +0000236static void
Fred Drakeba096332000-07-09 07:04:36 +0000237string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000238{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000239 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000240}
241
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000242static int
243string_getsize(register PyObject *op)
244{
245 char *s;
246 int len;
247 if (PyString_AsStringAndSize(op, &s, &len))
248 return -1;
249 return len;
250}
251
252static /*const*/ char *
253string_getbuffer(register PyObject *op)
254{
255 char *s;
256 int len;
257 if (PyString_AsStringAndSize(op, &s, &len))
258 return NULL;
259 return s;
260}
261
Guido van Rossumd7047b31995-01-02 19:07:15 +0000262int
Fred Drakeba096332000-07-09 07:04:36 +0000263PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000265 if (!PyString_Check(op))
266 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000267 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268}
269
270/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000271PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000273 if (!PyString_Check(op))
274 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000275 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276}
277
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000278/* Internal API needed by PyString_AsStringAndSize(): */
279extern
280PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
281 const char *errors);
282
283int
284PyString_AsStringAndSize(register PyObject *obj,
285 register char **s,
286 register int *len)
287{
288 if (s == NULL) {
289 PyErr_BadInternalCall();
290 return -1;
291 }
292
293 if (!PyString_Check(obj)) {
294 if (PyUnicode_Check(obj)) {
295 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
296 if (obj == NULL)
297 return -1;
298 }
299 else {
300 PyErr_Format(PyExc_TypeError,
301 "expected string or Unicode object, "
302 "%.200s found", obj->ob_type->tp_name);
303 return -1;
304 }
305 }
306
307 *s = PyString_AS_STRING(obj);
308 if (len != NULL)
309 *len = PyString_GET_SIZE(obj);
310 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
311 PyErr_SetString(PyExc_TypeError,
312 "expected string without null bytes");
313 return -1;
314 }
315 return 0;
316}
317
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318/* Methods */
319
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000320static int
Fred Drakeba096332000-07-09 07:04:36 +0000321string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322{
323 int i;
324 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000325 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000326 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000327 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000328 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000329 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000331
Thomas Wouters7e474022000-07-16 12:04:32 +0000332 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 quote = '\'';
334 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
335 quote = '"';
336
337 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 for (i = 0; i < op->ob_size; i++) {
339 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000340 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000341 fprintf(fp, "\\%c", c);
342 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000345 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000346 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000347 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000348 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349}
350
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000351static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000352string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000353{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000354 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
355 PyObject *v;
356 if (newsize > INT_MAX) {
357 PyErr_SetString(PyExc_OverflowError,
358 "string is too large to make repr");
359 }
360 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000361 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000362 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 }
364 else {
365 register int i;
366 register char c;
367 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000368 int quote;
369
Thomas Wouters7e474022000-07-16 12:04:32 +0000370 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000371 quote = '\'';
372 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
373 quote = '"';
374
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000375 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000376 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000377 for (i = 0; i < op->ob_size; i++) {
378 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000379 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380 *p++ = '\\', *p++ = c;
381 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000382 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000383 while (*p != '\0')
384 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000385 }
386 else
387 *p++ = c;
388 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000389 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000390 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000391 _PyString_Resize(
392 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000393 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000394 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000395}
396
397static int
Fred Drakeba096332000-07-09 07:04:36 +0000398string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000399{
400 return a->ob_size;
401}
402
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000403static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000404string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405{
406 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000407 register PyStringObject *op;
408 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000409 if (PyUnicode_Check(bb))
410 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000411 PyErr_Format(PyExc_TypeError,
412 "cannot add type \"%.200s\" to string",
413 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000414 return NULL;
415 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000416#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000417 /* Optimize cases with empty left or right operand */
418 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000419 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000420 return bb;
421 }
422 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000423 Py_INCREF(a);
424 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000425 }
426 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000427 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000429 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000430 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000431 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000432 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000433#ifdef CACHE_HASH
434 op->ob_shash = -1;
435#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000436#ifdef INTERN_STRINGS
437 op->ob_sinterned = NULL;
438#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000439 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
440 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
441 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000443#undef b
444}
445
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000446static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000447string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000448{
449 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000450 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000451 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000452 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000453 if (n < 0)
454 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000455 /* watch out for overflows: the size can overflow int,
456 * and the # of bytes needed can overflow size_t
457 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000459 if (n && size / n != a->ob_size) {
460 PyErr_SetString(PyExc_OverflowError,
461 "repeated string is too long");
462 return NULL;
463 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000465 Py_INCREF(a);
466 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 }
Tim Peters8f422462000-09-09 06:13:41 +0000468 nbytes = size * sizeof(char);
469 if (nbytes / sizeof(char) != (size_t)size ||
470 nbytes + sizeof(PyStringObject) <= nbytes) {
471 PyErr_SetString(PyExc_OverflowError,
472 "repeated string is too long");
473 return NULL;
474 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000475 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000476 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000477 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000478 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000479 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000480#ifdef CACHE_HASH
481 op->ob_shash = -1;
482#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000483#ifdef INTERN_STRINGS
484 op->ob_sinterned = NULL;
485#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000486 for (i = 0; i < size; i += a->ob_size)
487 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
488 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000489 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000490}
491
492/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
493
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000494static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000495string_slice(register PyStringObject *a, register int i, register int j)
496 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000497{
498 if (i < 0)
499 i = 0;
500 if (j < 0)
501 j = 0; /* Avoid signed/unsigned bug in next line */
502 if (j > a->ob_size)
503 j = a->ob_size;
504 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000505 Py_INCREF(a);
506 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000507 }
508 if (j < i)
509 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000510 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000511}
512
Guido van Rossum9284a572000-03-07 15:53:43 +0000513static int
Fred Drakeba096332000-07-09 07:04:36 +0000514string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000515{
516 register char *s, *end;
517 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000518 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000519 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000520 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000521 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000522 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000523 return -1;
524 }
525 c = PyString_AsString(el)[0];
526 s = PyString_AsString(a);
527 end = s + PyString_Size(a);
528 while (s < end) {
529 if (c == *s++)
530 return 1;
531 }
532 return 0;
533}
534
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000535static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000536string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000537{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000538 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000539 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000541 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000542 return NULL;
543 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000544 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000545 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000546#ifdef COUNT_ALLOCS
547 if (v != NULL)
548 one_strings++;
549#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000550 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000551 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000552 if (v == NULL)
553 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000554 characters[c] = (PyStringObject *) v;
555 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000556 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000557 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000558 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559}
560
561static int
Fred Drakeba096332000-07-09 07:04:36 +0000562string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000563{
Guido van Rossum253919f1991-02-13 23:18:39 +0000564 int len_a = a->ob_size, len_b = b->ob_size;
565 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000566 int cmp;
567 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000568 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000569 if (cmp == 0)
570 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
571 if (cmp != 0)
572 return cmp;
573 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000574 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000575}
576
Guido van Rossum9bfef441993-03-29 10:43:31 +0000577static long
Fred Drakeba096332000-07-09 07:04:36 +0000578string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000579{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000580 register int len;
581 register unsigned char *p;
582 register long x;
583
584#ifdef CACHE_HASH
585 if (a->ob_shash != -1)
586 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000587#ifdef INTERN_STRINGS
588 if (a->ob_sinterned != NULL)
589 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000590 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000591#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000592#endif
593 len = a->ob_size;
594 p = (unsigned char *) a->ob_sval;
595 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000596 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000597 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000598 x ^= a->ob_size;
599 if (x == -1)
600 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000601#ifdef CACHE_HASH
602 a->ob_shash = x;
603#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000604 return x;
605}
606
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000607static int
Fred Drakeba096332000-07-09 07:04:36 +0000608string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000609{
610 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000611 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000612 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000613 return -1;
614 }
615 *ptr = (void *)self->ob_sval;
616 return self->ob_size;
617}
618
619static int
Fred Drakeba096332000-07-09 07:04:36 +0000620string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000621{
Guido van Rossum045e6881997-09-08 18:30:11 +0000622 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000623 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000624 return -1;
625}
626
627static int
Fred Drakeba096332000-07-09 07:04:36 +0000628string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000629{
630 if ( lenp )
631 *lenp = self->ob_size;
632 return 1;
633}
634
Guido van Rossum1db70701998-10-08 02:18:52 +0000635static int
Fred Drakeba096332000-07-09 07:04:36 +0000636string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000637{
638 if ( index != 0 ) {
639 PyErr_SetString(PyExc_SystemError,
640 "accessing non-existent string segment");
641 return -1;
642 }
643 *ptr = self->ob_sval;
644 return self->ob_size;
645}
646
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000647static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000648 (inquiry)string_length, /*sq_length*/
649 (binaryfunc)string_concat, /*sq_concat*/
650 (intargfunc)string_repeat, /*sq_repeat*/
651 (intargfunc)string_item, /*sq_item*/
652 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000653 0, /*sq_ass_item*/
654 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000655 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000656};
657
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000658static PyBufferProcs string_as_buffer = {
659 (getreadbufferproc)string_buffer_getreadbuf,
660 (getwritebufferproc)string_buffer_getwritebuf,
661 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000662 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000663};
664
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665
666
667#define LEFTSTRIP 0
668#define RIGHTSTRIP 1
669#define BOTHSTRIP 2
670
671
672static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000673split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000674{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000675 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000676 PyObject* item;
677 PyObject *list = PyList_New(0);
678
679 if (list == NULL)
680 return NULL;
681
Guido van Rossum4c08d552000-03-10 22:55:18 +0000682 for (i = j = 0; i < len; ) {
683 while (i < len && isspace(Py_CHARMASK(s[i])))
684 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000685 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000686 while (i < len && !isspace(Py_CHARMASK(s[i])))
687 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000689 if (maxsplit-- <= 0)
690 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000691 item = PyString_FromStringAndSize(s+j, (int)(i-j));
692 if (item == NULL)
693 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000694 err = PyList_Append(list, item);
695 Py_DECREF(item);
696 if (err < 0)
697 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000698 while (i < len && isspace(Py_CHARMASK(s[i])))
699 i++;
700 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000701 }
702 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000703 if (j < len) {
704 item = PyString_FromStringAndSize(s+j, (int)(len - j));
705 if (item == NULL)
706 goto finally;
707 err = PyList_Append(list, item);
708 Py_DECREF(item);
709 if (err < 0)
710 goto finally;
711 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000712 return list;
713 finally:
714 Py_DECREF(list);
715 return NULL;
716}
717
718
719static char split__doc__[] =
720"S.split([sep [,maxsplit]]) -> list of strings\n\
721\n\
722Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000723delimiter string. If maxsplit is given, at most maxsplit\n\
724splits are done. If sep is not specified, any whitespace string\n\
725is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000726
727static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000728string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000729{
730 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000731 int maxsplit = -1;
732 const char *s = PyString_AS_STRING(self), *sub;
733 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000734
Guido van Rossum4c08d552000-03-10 22:55:18 +0000735 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000736 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (maxsplit < 0)
738 maxsplit = INT_MAX;
739 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000740 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000741 if (PyString_Check(subobj)) {
742 sub = PyString_AS_STRING(subobj);
743 n = PyString_GET_SIZE(subobj);
744 }
745 else if (PyUnicode_Check(subobj))
746 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
747 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
748 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000749 if (n == 0) {
750 PyErr_SetString(PyExc_ValueError, "empty separator");
751 return NULL;
752 }
753
754 list = PyList_New(0);
755 if (list == NULL)
756 return NULL;
757
758 i = j = 0;
759 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000760 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000761 if (maxsplit-- <= 0)
762 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000763 item = PyString_FromStringAndSize(s+j, (int)(i-j));
764 if (item == NULL)
765 goto fail;
766 err = PyList_Append(list, item);
767 Py_DECREF(item);
768 if (err < 0)
769 goto fail;
770 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000771 }
772 else
773 i++;
774 }
775 item = PyString_FromStringAndSize(s+j, (int)(len-j));
776 if (item == NULL)
777 goto fail;
778 err = PyList_Append(list, item);
779 Py_DECREF(item);
780 if (err < 0)
781 goto fail;
782
783 return list;
784
785 fail:
786 Py_DECREF(list);
787 return NULL;
788}
789
790
791static char join__doc__[] =
792"S.join(sequence) -> string\n\
793\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000794Return a string which is the concatenation of the strings in the\n\
795sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000796
797static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000798string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000799{
800 char *sep = PyString_AS_STRING(self);
801 int seplen = PyString_GET_SIZE(self);
802 PyObject *res = NULL;
803 int reslen = 0;
804 char *p;
805 int seqlen = 0;
806 int sz = 100;
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000807 int i, slen, sz_incr;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000808 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000809
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000810 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000811 return NULL;
812
Barry Warsaw771d0672000-07-11 04:58:12 +0000813 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000814 if (PyErr_ExceptionMatches(PyExc_TypeError))
815 PyErr_Format(PyExc_TypeError,
816 "sequence expected, %.80s found",
817 orig->ob_type->tp_name);
818 return NULL;
819 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000820 /* From here on out, errors go through finally: for proper
821 * reference count manipulations.
822 */
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000823 seqlen = PySequence_Size(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000824 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000825 item = PySequence_Fast_GET_ITEM(seq, 0);
826 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000827 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000828 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000829 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000830
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000831 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000832 goto finally;
833
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000834 p = PyString_AS_STRING(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000835
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000836 for (i = 0; i < seqlen; i++) {
837 item = PySequence_Fast_GET_ITEM(seq, i);
838 if (!PyString_Check(item)){
839 if (PyUnicode_Check(item)) {
840 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000841 Py_DECREF(seq);
842 return PyUnicode_Join((PyObject *)self, seq);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000843 }
844 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000845 "sequence item %i: expected string,"
846 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000847 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000848 goto finally;
849 }
850 slen = PyString_GET_SIZE(item);
851 while (reslen + slen + seplen >= sz) {
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000852 /* at least double the size of the string */
853 sz_incr = slen + seplen > sz ? slen + seplen : sz;
854 if (_PyString_Resize(&res, sz + sz_incr)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000855 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000856 }
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000857 sz += sz_incr;
858 p = PyString_AS_STRING(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000859 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000860 if (i > 0) {
861 memcpy(p, sep, seplen);
862 p += seplen;
863 reslen += seplen;
864 }
865 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000866 p += slen;
867 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868 }
869 if (_PyString_Resize(&res, reslen))
870 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000871 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000872 return res;
873
874 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000875 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000876 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000877 return NULL;
878}
879
880
881
882static long
Fred Drakeba096332000-07-09 07:04:36 +0000883string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000884{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000885 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000886 int len = PyString_GET_SIZE(self);
887 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000888 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889
Guido van Rossumc6821402000-05-08 14:08:05 +0000890 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
891 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000892 return -2;
893 if (PyString_Check(subobj)) {
894 sub = PyString_AS_STRING(subobj);
895 n = PyString_GET_SIZE(subobj);
896 }
897 else if (PyUnicode_Check(subobj))
898 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
899 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000900 return -2;
901
902 if (last > len)
903 last = len;
904 if (last < 0)
905 last += len;
906 if (last < 0)
907 last = 0;
908 if (i < 0)
909 i += len;
910 if (i < 0)
911 i = 0;
912
Guido van Rossum4c08d552000-03-10 22:55:18 +0000913 if (dir > 0) {
914 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000915 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000916 last -= n;
917 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000918 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000919 return (long)i;
920 }
921 else {
922 int j;
923
924 if (n == 0 && i <= last)
925 return (long)last;
926 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000927 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000928 return (long)j;
929 }
930
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000931 return -1;
932}
933
934
935static char find__doc__[] =
936"S.find(sub [,start [,end]]) -> int\n\
937\n\
938Return the lowest index in S where substring sub is found,\n\
939such that sub is contained within s[start,end]. Optional\n\
940arguments start and end are interpreted as in slice notation.\n\
941\n\
942Return -1 on failure.";
943
944static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000945string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000946{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000947 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000948 if (result == -2)
949 return NULL;
950 return PyInt_FromLong(result);
951}
952
953
954static char index__doc__[] =
955"S.index(sub [,start [,end]]) -> int\n\
956\n\
957Like S.find() but raise ValueError when the substring is not found.";
958
959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000960string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000961{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000962 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000963 if (result == -2)
964 return NULL;
965 if (result == -1) {
966 PyErr_SetString(PyExc_ValueError,
967 "substring not found in string.index");
968 return NULL;
969 }
970 return PyInt_FromLong(result);
971}
972
973
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000974static char rfind__doc__[] =
975"S.rfind(sub [,start [,end]]) -> int\n\
976\n\
977Return the highest index in S where substring sub is found,\n\
978such that sub is contained within s[start,end]. Optional\n\
979arguments start and end are interpreted as in slice notation.\n\
980\n\
981Return -1 on failure.";
982
983static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000984string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000985{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000986 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000987 if (result == -2)
988 return NULL;
989 return PyInt_FromLong(result);
990}
991
992
993static char rindex__doc__[] =
994"S.rindex(sub [,start [,end]]) -> int\n\
995\n\
996Like S.rfind() but raise ValueError when the substring is not found.";
997
998static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000999string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001000{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001001 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001002 if (result == -2)
1003 return NULL;
1004 if (result == -1) {
1005 PyErr_SetString(PyExc_ValueError,
1006 "substring not found in string.rindex");
1007 return NULL;
1008 }
1009 return PyInt_FromLong(result);
1010}
1011
1012
1013static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001014do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001015{
1016 char *s = PyString_AS_STRING(self);
1017 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001018
Guido van Rossum43713e52000-02-29 13:59:29 +00001019 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001020 return NULL;
1021
1022 i = 0;
1023 if (striptype != RIGHTSTRIP) {
1024 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1025 i++;
1026 }
1027 }
1028
1029 j = len;
1030 if (striptype != LEFTSTRIP) {
1031 do {
1032 j--;
1033 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1034 j++;
1035 }
1036
1037 if (i == 0 && j == len) {
1038 Py_INCREF(self);
1039 return (PyObject*)self;
1040 }
1041 else
1042 return PyString_FromStringAndSize(s+i, j-i);
1043}
1044
1045
1046static char strip__doc__[] =
1047"S.strip() -> string\n\
1048\n\
1049Return a copy of the string S with leading and trailing\n\
1050whitespace removed.";
1051
1052static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001053string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054{
1055 return do_strip(self, args, BOTHSTRIP);
1056}
1057
1058
1059static char lstrip__doc__[] =
1060"S.lstrip() -> string\n\
1061\n\
1062Return a copy of the string S with leading whitespace removed.";
1063
1064static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001065string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001066{
1067 return do_strip(self, args, LEFTSTRIP);
1068}
1069
1070
1071static char rstrip__doc__[] =
1072"S.rstrip() -> string\n\
1073\n\
1074Return a copy of the string S with trailing whitespace removed.";
1075
1076static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001077string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001078{
1079 return do_strip(self, args, RIGHTSTRIP);
1080}
1081
1082
1083static char lower__doc__[] =
1084"S.lower() -> string\n\
1085\n\
1086Return a copy of the string S converted to lowercase.";
1087
1088static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001089string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001090{
1091 char *s = PyString_AS_STRING(self), *s_new;
1092 int i, n = PyString_GET_SIZE(self);
1093 PyObject *new;
1094
Guido van Rossum43713e52000-02-29 13:59:29 +00001095 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001096 return NULL;
1097 new = PyString_FromStringAndSize(NULL, n);
1098 if (new == NULL)
1099 return NULL;
1100 s_new = PyString_AsString(new);
1101 for (i = 0; i < n; i++) {
1102 int c = Py_CHARMASK(*s++);
1103 if (isupper(c)) {
1104 *s_new = tolower(c);
1105 } else
1106 *s_new = c;
1107 s_new++;
1108 }
1109 return new;
1110}
1111
1112
1113static char upper__doc__[] =
1114"S.upper() -> string\n\
1115\n\
1116Return a copy of the string S converted to uppercase.";
1117
1118static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001119string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001120{
1121 char *s = PyString_AS_STRING(self), *s_new;
1122 int i, n = PyString_GET_SIZE(self);
1123 PyObject *new;
1124
Guido van Rossum43713e52000-02-29 13:59:29 +00001125 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126 return NULL;
1127 new = PyString_FromStringAndSize(NULL, n);
1128 if (new == NULL)
1129 return NULL;
1130 s_new = PyString_AsString(new);
1131 for (i = 0; i < n; i++) {
1132 int c = Py_CHARMASK(*s++);
1133 if (islower(c)) {
1134 *s_new = toupper(c);
1135 } else
1136 *s_new = c;
1137 s_new++;
1138 }
1139 return new;
1140}
1141
1142
Guido van Rossum4c08d552000-03-10 22:55:18 +00001143static char title__doc__[] =
1144"S.title() -> string\n\
1145\n\
1146Return a titlecased version of S, i.e. words start with uppercase\n\
1147characters, all remaining cased characters have lowercase.";
1148
1149static PyObject*
1150string_title(PyUnicodeObject *self, PyObject *args)
1151{
1152 char *s = PyString_AS_STRING(self), *s_new;
1153 int i, n = PyString_GET_SIZE(self);
1154 int previous_is_cased = 0;
1155 PyObject *new;
1156
1157 if (!PyArg_ParseTuple(args, ":title"))
1158 return NULL;
1159 new = PyString_FromStringAndSize(NULL, n);
1160 if (new == NULL)
1161 return NULL;
1162 s_new = PyString_AsString(new);
1163 for (i = 0; i < n; i++) {
1164 int c = Py_CHARMASK(*s++);
1165 if (islower(c)) {
1166 if (!previous_is_cased)
1167 c = toupper(c);
1168 previous_is_cased = 1;
1169 } else if (isupper(c)) {
1170 if (previous_is_cased)
1171 c = tolower(c);
1172 previous_is_cased = 1;
1173 } else
1174 previous_is_cased = 0;
1175 *s_new++ = c;
1176 }
1177 return new;
1178}
1179
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180static char capitalize__doc__[] =
1181"S.capitalize() -> string\n\
1182\n\
1183Return a copy of the string S with only its first character\n\
1184capitalized.";
1185
1186static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001187string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001188{
1189 char *s = PyString_AS_STRING(self), *s_new;
1190 int i, n = PyString_GET_SIZE(self);
1191 PyObject *new;
1192
Guido van Rossum43713e52000-02-29 13:59:29 +00001193 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001194 return NULL;
1195 new = PyString_FromStringAndSize(NULL, n);
1196 if (new == NULL)
1197 return NULL;
1198 s_new = PyString_AsString(new);
1199 if (0 < n) {
1200 int c = Py_CHARMASK(*s++);
1201 if (islower(c))
1202 *s_new = toupper(c);
1203 else
1204 *s_new = c;
1205 s_new++;
1206 }
1207 for (i = 1; i < n; i++) {
1208 int c = Py_CHARMASK(*s++);
1209 if (isupper(c))
1210 *s_new = tolower(c);
1211 else
1212 *s_new = c;
1213 s_new++;
1214 }
1215 return new;
1216}
1217
1218
1219static char count__doc__[] =
1220"S.count(sub[, start[, end]]) -> int\n\
1221\n\
1222Return the number of occurrences of substring sub in string\n\
1223S[start:end]. Optional arguments start and end are\n\
1224interpreted as in slice notation.";
1225
1226static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001227string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001228{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001229 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230 int len = PyString_GET_SIZE(self), n;
1231 int i = 0, last = INT_MAX;
1232 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001233 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001234
Guido van Rossumc6821402000-05-08 14:08:05 +00001235 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1236 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001237 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001238
Guido van Rossum4c08d552000-03-10 22:55:18 +00001239 if (PyString_Check(subobj)) {
1240 sub = PyString_AS_STRING(subobj);
1241 n = PyString_GET_SIZE(subobj);
1242 }
1243 else if (PyUnicode_Check(subobj))
1244 return PyInt_FromLong(
1245 PyUnicode_Count((PyObject *)self, subobj, i, last));
1246 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1247 return NULL;
1248
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001249 if (last > len)
1250 last = len;
1251 if (last < 0)
1252 last += len;
1253 if (last < 0)
1254 last = 0;
1255 if (i < 0)
1256 i += len;
1257 if (i < 0)
1258 i = 0;
1259 m = last + 1 - n;
1260 if (n == 0)
1261 return PyInt_FromLong((long) (m-i));
1262
1263 r = 0;
1264 while (i < m) {
1265 if (!memcmp(s+i, sub, n)) {
1266 r++;
1267 i += n;
1268 } else {
1269 i++;
1270 }
1271 }
1272 return PyInt_FromLong((long) r);
1273}
1274
1275
1276static char swapcase__doc__[] =
1277"S.swapcase() -> string\n\
1278\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001279Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280converted to lowercase and vice versa.";
1281
1282static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001283string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001284{
1285 char *s = PyString_AS_STRING(self), *s_new;
1286 int i, n = PyString_GET_SIZE(self);
1287 PyObject *new;
1288
Guido van Rossum43713e52000-02-29 13:59:29 +00001289 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 return NULL;
1291 new = PyString_FromStringAndSize(NULL, n);
1292 if (new == NULL)
1293 return NULL;
1294 s_new = PyString_AsString(new);
1295 for (i = 0; i < n; i++) {
1296 int c = Py_CHARMASK(*s++);
1297 if (islower(c)) {
1298 *s_new = toupper(c);
1299 }
1300 else if (isupper(c)) {
1301 *s_new = tolower(c);
1302 }
1303 else
1304 *s_new = c;
1305 s_new++;
1306 }
1307 return new;
1308}
1309
1310
1311static char translate__doc__[] =
1312"S.translate(table [,deletechars]) -> string\n\
1313\n\
1314Return a copy of the string S, where all characters occurring\n\
1315in the optional argument deletechars are removed, and the\n\
1316remaining characters have been mapped through the given\n\
1317translation table, which must be a string of length 256.";
1318
1319static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001320string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 register char *input, *output;
1323 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324 register int i, c, changed = 0;
1325 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001326 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327 int inlen, tablen, dellen = 0;
1328 PyObject *result;
1329 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001331
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 if (!PyArg_ParseTuple(args, "O|O:translate",
1333 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335
1336 if (PyString_Check(tableobj)) {
1337 table1 = PyString_AS_STRING(tableobj);
1338 tablen = PyString_GET_SIZE(tableobj);
1339 }
1340 else if (PyUnicode_Check(tableobj)) {
1341 /* Unicode .translate() does not support the deletechars
1342 parameter; instead a mapping to None will cause characters
1343 to be deleted. */
1344 if (delobj != NULL) {
1345 PyErr_SetString(PyExc_TypeError,
1346 "deletions are implemented differently for unicode");
1347 return NULL;
1348 }
1349 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1350 }
1351 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353
1354 if (delobj != NULL) {
1355 if (PyString_Check(delobj)) {
1356 del_table = PyString_AS_STRING(delobj);
1357 dellen = PyString_GET_SIZE(delobj);
1358 }
1359 else if (PyUnicode_Check(delobj)) {
1360 PyErr_SetString(PyExc_TypeError,
1361 "deletions are implemented differently for unicode");
1362 return NULL;
1363 }
1364 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1365 return NULL;
1366
1367 if (tablen != 256) {
1368 PyErr_SetString(PyExc_ValueError,
1369 "translation table must be 256 characters long");
1370 return NULL;
1371 }
1372 }
1373 else {
1374 del_table = NULL;
1375 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376 }
1377
1378 table = table1;
1379 inlen = PyString_Size(input_obj);
1380 result = PyString_FromStringAndSize((char *)NULL, inlen);
1381 if (result == NULL)
1382 return NULL;
1383 output_start = output = PyString_AsString(result);
1384 input = PyString_AsString(input_obj);
1385
1386 if (dellen == 0) {
1387 /* If no deletions are required, use faster code */
1388 for (i = inlen; --i >= 0; ) {
1389 c = Py_CHARMASK(*input++);
1390 if (Py_CHARMASK((*output++ = table[c])) != c)
1391 changed = 1;
1392 }
1393 if (changed)
1394 return result;
1395 Py_DECREF(result);
1396 Py_INCREF(input_obj);
1397 return input_obj;
1398 }
1399
1400 for (i = 0; i < 256; i++)
1401 trans_table[i] = Py_CHARMASK(table[i]);
1402
1403 for (i = 0; i < dellen; i++)
1404 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1405
1406 for (i = inlen; --i >= 0; ) {
1407 c = Py_CHARMASK(*input++);
1408 if (trans_table[c] != -1)
1409 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1410 continue;
1411 changed = 1;
1412 }
1413 if (!changed) {
1414 Py_DECREF(result);
1415 Py_INCREF(input_obj);
1416 return input_obj;
1417 }
1418 /* Fix the size of the resulting string */
1419 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1420 return NULL;
1421 return result;
1422}
1423
1424
1425/* What follows is used for implementing replace(). Perry Stoll. */
1426
1427/*
1428 mymemfind
1429
1430 strstr replacement for arbitrary blocks of memory.
1431
Barry Warsaw51ac5802000-03-20 16:36:48 +00001432 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433 contents of memory pointed to by PAT. Returns the index into MEM if
1434 found, or -1 if not found. If len of PAT is greater than length of
1435 MEM, the function returns -1.
1436*/
1437static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001438mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439{
1440 register int ii;
1441
1442 /* pattern can not occur in the last pat_len-1 chars */
1443 len -= pat_len;
1444
1445 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001446 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447 return ii;
1448 }
1449 }
1450 return -1;
1451}
1452
1453/*
1454 mymemcnt
1455
1456 Return the number of distinct times PAT is found in MEM.
1457 meaning mem=1111 and pat==11 returns 2.
1458 mem=11111 and pat==11 also return 2.
1459 */
1460static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001461mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462{
1463 register int offset = 0;
1464 int nfound = 0;
1465
1466 while (len >= 0) {
1467 offset = mymemfind(mem, len, pat, pat_len);
1468 if (offset == -1)
1469 break;
1470 mem += offset + pat_len;
1471 len -= offset + pat_len;
1472 nfound++;
1473 }
1474 return nfound;
1475}
1476
1477/*
1478 mymemreplace
1479
Thomas Wouters7e474022000-07-16 12:04:32 +00001480 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 replaced with SUB.
1482
Thomas Wouters7e474022000-07-16 12:04:32 +00001483 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 of PAT in STR, then the original string is returned. Otherwise, a new
1485 string is allocated here and returned.
1486
1487 on return, out_len is:
1488 the length of output string, or
1489 -1 if the input string is returned, or
1490 unchanged if an error occurs (no memory).
1491
1492 return value is:
1493 the new string allocated locally, or
1494 NULL if an error occurred.
1495*/
1496static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001497mymemreplace(const char *str, int len, /* input string */
1498 const char *pat, int pat_len, /* pattern string to find */
1499 const char *sub, int sub_len, /* substitution string */
1500 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001501 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502{
1503 char *out_s;
1504 char *new_s;
1505 int nfound, offset, new_len;
1506
1507 if (len == 0 || pat_len > len)
1508 goto return_same;
1509
1510 /* find length of output string */
1511 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001512 if (count < 0)
1513 count = INT_MAX;
1514 else if (nfound > count)
1515 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 if (nfound == 0)
1517 goto return_same;
1518 new_len = len + nfound*(sub_len - pat_len);
1519
Guido van Rossumb18618d2000-05-03 23:44:39 +00001520 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521 if (new_s == NULL) return NULL;
1522
1523 *out_len = new_len;
1524 out_s = new_s;
1525
1526 while (len > 0) {
1527 /* find index of next instance of pattern */
1528 offset = mymemfind(str, len, pat, pat_len);
1529 /* if not found, break out of loop */
1530 if (offset == -1) break;
1531
1532 /* copy non matching part of input string */
1533 memcpy(new_s, str, offset); /* copy part of str before pat */
1534 str += offset + pat_len; /* move str past pattern */
1535 len -= offset + pat_len; /* reduce length of str remaining */
1536
1537 /* copy substitute into the output string */
1538 new_s += offset; /* move new_s to dest for sub string */
1539 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1540 new_s += sub_len; /* offset new_s past sub string */
1541
1542 /* break when we've done count replacements */
1543 if (--count == 0) break;
1544 }
1545 /* copy any remaining values into output string */
1546 if (len > 0)
1547 memcpy(new_s, str, len);
1548 return out_s;
1549
1550 return_same:
1551 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001552 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553}
1554
1555
1556static char replace__doc__[] =
1557"S.replace (old, new[, maxsplit]) -> string\n\
1558\n\
1559Return a copy of string S with all occurrences of substring\n\
1560old replaced by new. If the optional argument maxsplit is\n\
1561given, only the first maxsplit occurrences are replaced.";
1562
1563static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001564string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001566 const char *str = PyString_AS_STRING(self), *sub, *repl;
1567 char *new_s;
1568 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1569 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572
Guido van Rossum4c08d552000-03-10 22:55:18 +00001573 if (!PyArg_ParseTuple(args, "OO|i:replace",
1574 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001576
1577 if (PyString_Check(subobj)) {
1578 sub = PyString_AS_STRING(subobj);
1579 sub_len = PyString_GET_SIZE(subobj);
1580 }
1581 else if (PyUnicode_Check(subobj))
1582 return PyUnicode_Replace((PyObject *)self,
1583 subobj, replobj, count);
1584 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1585 return NULL;
1586
1587 if (PyString_Check(replobj)) {
1588 repl = PyString_AS_STRING(replobj);
1589 repl_len = PyString_GET_SIZE(replobj);
1590 }
1591 else if (PyUnicode_Check(replobj))
1592 return PyUnicode_Replace((PyObject *)self,
1593 subobj, replobj, count);
1594 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1595 return NULL;
1596
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001597 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001598 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 return NULL;
1600 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001601 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 if (new_s == NULL) {
1603 PyErr_NoMemory();
1604 return NULL;
1605 }
1606 if (out_len == -1) {
1607 /* we're returning another reference to self */
1608 new = (PyObject*)self;
1609 Py_INCREF(new);
1610 }
1611 else {
1612 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001613 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 }
1615 return new;
1616}
1617
1618
1619static char startswith__doc__[] =
1620"S.startswith(prefix[, start[, end]]) -> int\n\
1621\n\
1622Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1623optional start, test S beginning at that position. With optional end, stop\n\
1624comparing S at that position.";
1625
1626static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001627string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001629 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 int plen;
1633 int start = 0;
1634 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001635 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636
Guido van Rossumc6821402000-05-08 14:08:05 +00001637 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1638 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001639 return NULL;
1640 if (PyString_Check(subobj)) {
1641 prefix = PyString_AS_STRING(subobj);
1642 plen = PyString_GET_SIZE(subobj);
1643 }
1644 else if (PyUnicode_Check(subobj))
1645 return PyInt_FromLong(
1646 PyUnicode_Tailmatch((PyObject *)self,
1647 subobj, start, end, -1));
1648 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001649 return NULL;
1650
1651 /* adopt Java semantics for index out of range. it is legal for
1652 * offset to be == plen, but this only returns true if prefix is
1653 * the empty string.
1654 */
1655 if (start < 0 || start+plen > len)
1656 return PyInt_FromLong(0);
1657
1658 if (!memcmp(str+start, prefix, plen)) {
1659 /* did the match end after the specified end? */
1660 if (end < 0)
1661 return PyInt_FromLong(1);
1662 else if (end - start < plen)
1663 return PyInt_FromLong(0);
1664 else
1665 return PyInt_FromLong(1);
1666 }
1667 else return PyInt_FromLong(0);
1668}
1669
1670
1671static char endswith__doc__[] =
1672"S.endswith(suffix[, start[, end]]) -> int\n\
1673\n\
1674Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1675optional start, test S beginning at that position. With optional end, stop\n\
1676comparing S at that position.";
1677
1678static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001679string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001680{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 const char* suffix;
1684 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001685 int start = 0;
1686 int end = -1;
1687 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001688 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689
Guido van Rossumc6821402000-05-08 14:08:05 +00001690 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1691 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001692 return NULL;
1693 if (PyString_Check(subobj)) {
1694 suffix = PyString_AS_STRING(subobj);
1695 slen = PyString_GET_SIZE(subobj);
1696 }
1697 else if (PyUnicode_Check(subobj))
1698 return PyInt_FromLong(
1699 PyUnicode_Tailmatch((PyObject *)self,
1700 subobj, start, end, +1));
1701 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702 return NULL;
1703
Guido van Rossum4c08d552000-03-10 22:55:18 +00001704 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 return PyInt_FromLong(0);
1706
1707 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001708 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709
Guido van Rossum4c08d552000-03-10 22:55:18 +00001710 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711 return PyInt_FromLong(1);
1712 else return PyInt_FromLong(0);
1713}
1714
1715
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001716static char encode__doc__[] =
1717"S.encode([encoding[,errors]]) -> string\n\
1718\n\
1719Return an encoded string version of S. Default encoding is the current\n\
1720default string encoding. errors may be given to set a different error\n\
1721handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1722a ValueError. Other possible values are 'ignore' and 'replace'.";
1723
1724static PyObject *
1725string_encode(PyStringObject *self, PyObject *args)
1726{
1727 char *encoding = NULL;
1728 char *errors = NULL;
1729 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1730 return NULL;
1731 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1732}
1733
1734
Guido van Rossum4c08d552000-03-10 22:55:18 +00001735static char expandtabs__doc__[] =
1736"S.expandtabs([tabsize]) -> string\n\
1737\n\
1738Return a copy of S where all tab characters are expanded using spaces.\n\
1739If tabsize is not given, a tab size of 8 characters is assumed.";
1740
1741static PyObject*
1742string_expandtabs(PyStringObject *self, PyObject *args)
1743{
1744 const char *e, *p;
1745 char *q;
1746 int i, j;
1747 PyObject *u;
1748 int tabsize = 8;
1749
1750 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1751 return NULL;
1752
Thomas Wouters7e474022000-07-16 12:04:32 +00001753 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001754 i = j = 0;
1755 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1756 for (p = PyString_AS_STRING(self); p < e; p++)
1757 if (*p == '\t') {
1758 if (tabsize > 0)
1759 j += tabsize - (j % tabsize);
1760 }
1761 else {
1762 j++;
1763 if (*p == '\n' || *p == '\r') {
1764 i += j;
1765 j = 0;
1766 }
1767 }
1768
1769 /* Second pass: create output string and fill it */
1770 u = PyString_FromStringAndSize(NULL, i + j);
1771 if (!u)
1772 return NULL;
1773
1774 j = 0;
1775 q = PyString_AS_STRING(u);
1776
1777 for (p = PyString_AS_STRING(self); p < e; p++)
1778 if (*p == '\t') {
1779 if (tabsize > 0) {
1780 i = tabsize - (j % tabsize);
1781 j += i;
1782 while (i--)
1783 *q++ = ' ';
1784 }
1785 }
1786 else {
1787 j++;
1788 *q++ = *p;
1789 if (*p == '\n' || *p == '\r')
1790 j = 0;
1791 }
1792
1793 return u;
1794}
1795
1796static
1797PyObject *pad(PyStringObject *self,
1798 int left,
1799 int right,
1800 char fill)
1801{
1802 PyObject *u;
1803
1804 if (left < 0)
1805 left = 0;
1806 if (right < 0)
1807 right = 0;
1808
1809 if (left == 0 && right == 0) {
1810 Py_INCREF(self);
1811 return (PyObject *)self;
1812 }
1813
1814 u = PyString_FromStringAndSize(NULL,
1815 left + PyString_GET_SIZE(self) + right);
1816 if (u) {
1817 if (left)
1818 memset(PyString_AS_STRING(u), fill, left);
1819 memcpy(PyString_AS_STRING(u) + left,
1820 PyString_AS_STRING(self),
1821 PyString_GET_SIZE(self));
1822 if (right)
1823 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1824 fill, right);
1825 }
1826
1827 return u;
1828}
1829
1830static char ljust__doc__[] =
1831"S.ljust(width) -> string\n\
1832\n\
1833Return S left justified in a string of length width. Padding is\n\
1834done using spaces.";
1835
1836static PyObject *
1837string_ljust(PyStringObject *self, PyObject *args)
1838{
1839 int width;
1840 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1841 return NULL;
1842
1843 if (PyString_GET_SIZE(self) >= width) {
1844 Py_INCREF(self);
1845 return (PyObject*) self;
1846 }
1847
1848 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1849}
1850
1851
1852static char rjust__doc__[] =
1853"S.rjust(width) -> string\n\
1854\n\
1855Return S right justified in a string of length width. Padding is\n\
1856done using spaces.";
1857
1858static PyObject *
1859string_rjust(PyStringObject *self, PyObject *args)
1860{
1861 int width;
1862 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1863 return NULL;
1864
1865 if (PyString_GET_SIZE(self) >= width) {
1866 Py_INCREF(self);
1867 return (PyObject*) self;
1868 }
1869
1870 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1871}
1872
1873
1874static char center__doc__[] =
1875"S.center(width) -> string\n\
1876\n\
1877Return S centered in a string of length width. Padding is done\n\
1878using spaces.";
1879
1880static PyObject *
1881string_center(PyStringObject *self, PyObject *args)
1882{
1883 int marg, left;
1884 int width;
1885
1886 if (!PyArg_ParseTuple(args, "i:center", &width))
1887 return NULL;
1888
1889 if (PyString_GET_SIZE(self) >= width) {
1890 Py_INCREF(self);
1891 return (PyObject*) self;
1892 }
1893
1894 marg = width - PyString_GET_SIZE(self);
1895 left = marg / 2 + (marg & width & 1);
1896
1897 return pad(self, left, marg - left, ' ');
1898}
1899
1900#if 0
1901static char zfill__doc__[] =
1902"S.zfill(width) -> string\n\
1903\n\
1904Pad a numeric string x with zeros on the left, to fill a field\n\
1905of the specified width. The string x is never truncated.";
1906
1907static PyObject *
1908string_zfill(PyStringObject *self, PyObject *args)
1909{
1910 int fill;
1911 PyObject *u;
1912 char *str;
1913
1914 int width;
1915 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1916 return NULL;
1917
1918 if (PyString_GET_SIZE(self) >= width) {
1919 Py_INCREF(self);
1920 return (PyObject*) self;
1921 }
1922
1923 fill = width - PyString_GET_SIZE(self);
1924
1925 u = pad(self, fill, 0, '0');
1926 if (u == NULL)
1927 return NULL;
1928
1929 str = PyString_AS_STRING(u);
1930 if (str[fill] == '+' || str[fill] == '-') {
1931 /* move sign to beginning of string */
1932 str[0] = str[fill];
1933 str[fill] = '0';
1934 }
1935
1936 return u;
1937}
1938#endif
1939
1940static char isspace__doc__[] =
1941"S.isspace() -> int\n\
1942\n\
1943Return 1 if there are only whitespace characters in S,\n\
19440 otherwise.";
1945
1946static PyObject*
1947string_isspace(PyStringObject *self, PyObject *args)
1948{
Fred Drakeba096332000-07-09 07:04:36 +00001949 register const unsigned char *p
1950 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001951 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001952
1953 if (!PyArg_NoArgs(args))
1954 return NULL;
1955
1956 /* Shortcut for single character strings */
1957 if (PyString_GET_SIZE(self) == 1 &&
1958 isspace(*p))
1959 return PyInt_FromLong(1);
1960
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001961 /* Special case for empty strings */
1962 if (PyString_GET_SIZE(self) == 0)
1963 return PyInt_FromLong(0);
1964
Guido van Rossum4c08d552000-03-10 22:55:18 +00001965 e = p + PyString_GET_SIZE(self);
1966 for (; p < e; p++) {
1967 if (!isspace(*p))
1968 return PyInt_FromLong(0);
1969 }
1970 return PyInt_FromLong(1);
1971}
1972
1973
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001974static char isalpha__doc__[] =
1975"S.isalpha() -> int\n\
1976\n\
1977Return 1 if all characters in S are alphabetic\n\
1978and there is at least one character in S, 0 otherwise.";
1979
1980static PyObject*
1981string_isalpha(PyUnicodeObject *self, PyObject *args)
1982{
Fred Drakeba096332000-07-09 07:04:36 +00001983 register const unsigned char *p
1984 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001985 register const unsigned char *e;
1986
1987 if (!PyArg_NoArgs(args))
1988 return NULL;
1989
1990 /* Shortcut for single character strings */
1991 if (PyString_GET_SIZE(self) == 1 &&
1992 isalpha(*p))
1993 return PyInt_FromLong(1);
1994
1995 /* Special case for empty strings */
1996 if (PyString_GET_SIZE(self) == 0)
1997 return PyInt_FromLong(0);
1998
1999 e = p + PyString_GET_SIZE(self);
2000 for (; p < e; p++) {
2001 if (!isalpha(*p))
2002 return PyInt_FromLong(0);
2003 }
2004 return PyInt_FromLong(1);
2005}
2006
2007
2008static char isalnum__doc__[] =
2009"S.isalnum() -> int\n\
2010\n\
2011Return 1 if all characters in S are alphanumeric\n\
2012and there is at least one character in S, 0 otherwise.";
2013
2014static PyObject*
2015string_isalnum(PyUnicodeObject *self, PyObject *args)
2016{
Fred Drakeba096332000-07-09 07:04:36 +00002017 register const unsigned char *p
2018 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002019 register const unsigned char *e;
2020
2021 if (!PyArg_NoArgs(args))
2022 return NULL;
2023
2024 /* Shortcut for single character strings */
2025 if (PyString_GET_SIZE(self) == 1 &&
2026 isalnum(*p))
2027 return PyInt_FromLong(1);
2028
2029 /* Special case for empty strings */
2030 if (PyString_GET_SIZE(self) == 0)
2031 return PyInt_FromLong(0);
2032
2033 e = p + PyString_GET_SIZE(self);
2034 for (; p < e; p++) {
2035 if (!isalnum(*p))
2036 return PyInt_FromLong(0);
2037 }
2038 return PyInt_FromLong(1);
2039}
2040
2041
Guido van Rossum4c08d552000-03-10 22:55:18 +00002042static char isdigit__doc__[] =
2043"S.isdigit() -> int\n\
2044\n\
2045Return 1 if there are only digit characters in S,\n\
20460 otherwise.";
2047
2048static PyObject*
2049string_isdigit(PyStringObject *self, PyObject *args)
2050{
Fred Drakeba096332000-07-09 07:04:36 +00002051 register const unsigned char *p
2052 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002053 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002054
2055 if (!PyArg_NoArgs(args))
2056 return NULL;
2057
2058 /* Shortcut for single character strings */
2059 if (PyString_GET_SIZE(self) == 1 &&
2060 isdigit(*p))
2061 return PyInt_FromLong(1);
2062
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002063 /* Special case for empty strings */
2064 if (PyString_GET_SIZE(self) == 0)
2065 return PyInt_FromLong(0);
2066
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067 e = p + PyString_GET_SIZE(self);
2068 for (; p < e; p++) {
2069 if (!isdigit(*p))
2070 return PyInt_FromLong(0);
2071 }
2072 return PyInt_FromLong(1);
2073}
2074
2075
2076static char islower__doc__[] =
2077"S.islower() -> int\n\
2078\n\
2079Return 1 if all cased characters in S are lowercase and there is\n\
2080at least one cased character in S, 0 otherwise.";
2081
2082static PyObject*
2083string_islower(PyStringObject *self, PyObject *args)
2084{
Fred Drakeba096332000-07-09 07:04:36 +00002085 register const unsigned char *p
2086 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002087 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088 int cased;
2089
2090 if (!PyArg_NoArgs(args))
2091 return NULL;
2092
2093 /* Shortcut for single character strings */
2094 if (PyString_GET_SIZE(self) == 1)
2095 return PyInt_FromLong(islower(*p) != 0);
2096
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002097 /* Special case for empty strings */
2098 if (PyString_GET_SIZE(self) == 0)
2099 return PyInt_FromLong(0);
2100
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101 e = p + PyString_GET_SIZE(self);
2102 cased = 0;
2103 for (; p < e; p++) {
2104 if (isupper(*p))
2105 return PyInt_FromLong(0);
2106 else if (!cased && islower(*p))
2107 cased = 1;
2108 }
2109 return PyInt_FromLong(cased);
2110}
2111
2112
2113static char isupper__doc__[] =
2114"S.isupper() -> int\n\
2115\n\
2116Return 1 if all cased characters in S are uppercase and there is\n\
2117at least one cased character in S, 0 otherwise.";
2118
2119static PyObject*
2120string_isupper(PyStringObject *self, PyObject *args)
2121{
Fred Drakeba096332000-07-09 07:04:36 +00002122 register const unsigned char *p
2123 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002124 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002125 int cased;
2126
2127 if (!PyArg_NoArgs(args))
2128 return NULL;
2129
2130 /* Shortcut for single character strings */
2131 if (PyString_GET_SIZE(self) == 1)
2132 return PyInt_FromLong(isupper(*p) != 0);
2133
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002134 /* Special case for empty strings */
2135 if (PyString_GET_SIZE(self) == 0)
2136 return PyInt_FromLong(0);
2137
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 e = p + PyString_GET_SIZE(self);
2139 cased = 0;
2140 for (; p < e; p++) {
2141 if (islower(*p))
2142 return PyInt_FromLong(0);
2143 else if (!cased && isupper(*p))
2144 cased = 1;
2145 }
2146 return PyInt_FromLong(cased);
2147}
2148
2149
2150static char istitle__doc__[] =
2151"S.istitle() -> int\n\
2152\n\
2153Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2154may only follow uncased characters and lowercase characters only cased\n\
2155ones. Return 0 otherwise.";
2156
2157static PyObject*
2158string_istitle(PyStringObject *self, PyObject *args)
2159{
Fred Drakeba096332000-07-09 07:04:36 +00002160 register const unsigned char *p
2161 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002162 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002163 int cased, previous_is_cased;
2164
2165 if (!PyArg_NoArgs(args))
2166 return NULL;
2167
2168 /* Shortcut for single character strings */
2169 if (PyString_GET_SIZE(self) == 1)
2170 return PyInt_FromLong(isupper(*p) != 0);
2171
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002172 /* Special case for empty strings */
2173 if (PyString_GET_SIZE(self) == 0)
2174 return PyInt_FromLong(0);
2175
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 e = p + PyString_GET_SIZE(self);
2177 cased = 0;
2178 previous_is_cased = 0;
2179 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002180 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002181
2182 if (isupper(ch)) {
2183 if (previous_is_cased)
2184 return PyInt_FromLong(0);
2185 previous_is_cased = 1;
2186 cased = 1;
2187 }
2188 else if (islower(ch)) {
2189 if (!previous_is_cased)
2190 return PyInt_FromLong(0);
2191 previous_is_cased = 1;
2192 cased = 1;
2193 }
2194 else
2195 previous_is_cased = 0;
2196 }
2197 return PyInt_FromLong(cased);
2198}
2199
2200
2201static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002202"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203\n\
2204Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002205Line breaks are not included in the resulting list unless keepends\n\
2206is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207
2208#define SPLIT_APPEND(data, left, right) \
2209 str = PyString_FromStringAndSize(data + left, right - left); \
2210 if (!str) \
2211 goto onError; \
2212 if (PyList_Append(list, str)) { \
2213 Py_DECREF(str); \
2214 goto onError; \
2215 } \
2216 else \
2217 Py_DECREF(str);
2218
2219static PyObject*
2220string_splitlines(PyStringObject *self, PyObject *args)
2221{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222 register int i;
2223 register int j;
2224 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002225 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 PyObject *list;
2227 PyObject *str;
2228 char *data;
2229
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002230 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002231 return NULL;
2232
2233 data = PyString_AS_STRING(self);
2234 len = PyString_GET_SIZE(self);
2235
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 list = PyList_New(0);
2237 if (!list)
2238 goto onError;
2239
2240 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002241 int eol;
2242
Guido van Rossum4c08d552000-03-10 22:55:18 +00002243 /* Find a line and append it */
2244 while (i < len && data[i] != '\n' && data[i] != '\r')
2245 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246
2247 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002248 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249 if (i < len) {
2250 if (data[i] == '\r' && i + 1 < len &&
2251 data[i+1] == '\n')
2252 i += 2;
2253 else
2254 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002255 if (keepends)
2256 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002258 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 j = i;
2260 }
2261 if (j < len) {
2262 SPLIT_APPEND(data, j, len);
2263 }
2264
2265 return list;
2266
2267 onError:
2268 Py_DECREF(list);
2269 return NULL;
2270}
2271
2272#undef SPLIT_APPEND
2273
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274
2275static PyMethodDef
2276string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002277 /* Counterparts of the obsolete stropmodule functions; except
2278 string.maketrans(). */
2279 {"join", (PyCFunction)string_join, 1, join__doc__},
2280 {"split", (PyCFunction)string_split, 1, split__doc__},
2281 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2282 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2283 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2284 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2285 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2286 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2287 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002288 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2289 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2291 {"count", (PyCFunction)string_count, 1, count__doc__},
2292 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2293 {"find", (PyCFunction)string_find, 1, find__doc__},
2294 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002295 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2297 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2298 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2299 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2301 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2302 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2304 {"title", (PyCFunction)string_title, 1, title__doc__},
2305 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2306 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2307 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002308 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2310 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2311#if 0
2312 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2313#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314 {NULL, NULL} /* sentinel */
2315};
2316
2317static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002318string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319{
2320 return Py_FindMethod(string_methods, (PyObject*)s, name);
2321}
2322
2323
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002324PyTypeObject PyString_Type = {
2325 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002326 0,
2327 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002328 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002329 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002330 (destructor)string_dealloc, /*tp_dealloc*/
2331 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002333 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002334 (cmpfunc)string_compare, /*tp_compare*/
2335 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002336 0, /*tp_as_number*/
2337 &string_as_sequence, /*tp_as_sequence*/
2338 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002339 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002340 0, /*tp_call*/
2341 0, /*tp_str*/
2342 0, /*tp_getattro*/
2343 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002344 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002345 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002346 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002347};
2348
2349void
Fred Drakeba096332000-07-09 07:04:36 +00002350PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002351{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002352 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002353 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002354 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002355 if (w == NULL || !PyString_Check(*pv)) {
2356 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002357 *pv = NULL;
2358 return;
2359 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002360 v = string_concat((PyStringObject *) *pv, w);
2361 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002362 *pv = v;
2363}
2364
Guido van Rossum013142a1994-08-30 08:19:36 +00002365void
Fred Drakeba096332000-07-09 07:04:36 +00002366PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002367{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002368 PyString_Concat(pv, w);
2369 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002370}
2371
2372
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002373/* The following function breaks the notion that strings are immutable:
2374 it changes the size of a string. We get away with this only if there
2375 is only one module referencing the object. You can also think of it
2376 as creating a new string object and destroying the old one, only
2377 more efficiently. In any case, don't use this if the string may
2378 already be known to some other part of the code... */
2379
2380int
Fred Drakeba096332000-07-09 07:04:36 +00002381_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002382{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002383 register PyObject *v;
2384 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002385 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002386 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002387 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002388 Py_DECREF(v);
2389 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002390 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002391 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002392 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002393#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002394 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002395#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002396 _Py_ForgetReference(v);
2397 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002398 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002399 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002400 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002401 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002402 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002403 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002404 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002405 _Py_NewReference(*pv);
2406 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002407 sv->ob_size = newsize;
2408 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002409 return 0;
2410}
Guido van Rossume5372401993-03-16 12:15:04 +00002411
2412/* Helpers for formatstring */
2413
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002414static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002415getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002416{
2417 int argidx = *p_argidx;
2418 if (argidx < arglen) {
2419 (*p_argidx)++;
2420 if (arglen < 0)
2421 return args;
2422 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002423 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002424 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002425 PyErr_SetString(PyExc_TypeError,
2426 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002427 return NULL;
2428}
2429
Tim Peters38fd5b62000-09-21 05:43:11 +00002430/* Format codes
2431 * F_LJUST '-'
2432 * F_SIGN '+'
2433 * F_BLANK ' '
2434 * F_ALT '#'
2435 * F_ZERO '0'
2436 */
Guido van Rossume5372401993-03-16 12:15:04 +00002437#define F_LJUST (1<<0)
2438#define F_SIGN (1<<1)
2439#define F_BLANK (1<<2)
2440#define F_ALT (1<<3)
2441#define F_ZERO (1<<4)
2442
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002443static int
Fred Drakeba096332000-07-09 07:04:36 +00002444formatfloat(char *buf, size_t buflen, int flags,
2445 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002446{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002447 /* fmt = '%#.' + `prec` + `type`
2448 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002449 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002450 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002451 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002452 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002453 if (prec < 0)
2454 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002455 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2456 type = 'g';
2457 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002458 /* worst case length calc to ensure no buffer overrun:
2459 fmt = %#.<prec>g
2460 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2461 for any double rep.)
2462 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2463 If prec=0 the effective precision is 1 (the leading digit is
2464 always given), therefore increase by one to 10+prec. */
2465 if (buflen <= (size_t)10 + (size_t)prec) {
2466 PyErr_SetString(PyExc_OverflowError,
2467 "formatted float is too long (precision too long?)");
2468 return -1;
2469 }
Guido van Rossume5372401993-03-16 12:15:04 +00002470 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002471 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002472}
2473
Tim Peters38fd5b62000-09-21 05:43:11 +00002474/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2475 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2476 * Python's regular ints.
2477 * Return value: a new PyString*, or NULL if error.
2478 * . *pbuf is set to point into it,
2479 * *plen set to the # of chars following that.
2480 * Caller must decref it when done using pbuf.
2481 * The string starting at *pbuf is of the form
2482 * "-"? ("0x" | "0X")? digit+
2483 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2484 * set in flags. The case of hex digits will be correct,
2485 * There will be at least prec digits, zero-filled on the left if
2486 * necessary to get that many.
2487 * val object to be converted
2488 * flags bitmask of format flags; only F_ALT is looked at
2489 * prec minimum number of digits; 0-fill on left if needed
2490 * type a character in [duoxX]; u acts the same as d
2491 *
2492 * CAUTION: o, x and X conversions on regular ints can never
2493 * produce a '-' sign, but can for Python's unbounded ints.
2494 */
2495PyObject*
2496_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2497 char **pbuf, int *plen)
2498{
2499 PyObject *result = NULL;
2500 char *buf;
2501 int i;
2502 int sign; /* 1 if '-', else 0 */
2503 int len; /* number of characters */
2504 int numdigits; /* len == numnondigits + numdigits */
2505 int numnondigits = 0;
2506
2507 switch (type) {
2508 case 'd':
2509 case 'u':
2510 result = val->ob_type->tp_str(val);
2511 break;
2512 case 'o':
2513 result = val->ob_type->tp_as_number->nb_oct(val);
2514 break;
2515 case 'x':
2516 case 'X':
2517 numnondigits = 2;
2518 result = val->ob_type->tp_as_number->nb_hex(val);
2519 break;
2520 default:
2521 assert(!"'type' not in [duoxX]");
2522 }
2523 if (!result)
2524 return NULL;
2525
2526 /* To modify the string in-place, there can only be one reference. */
2527 if (result->ob_refcnt != 1) {
2528 PyErr_BadInternalCall();
2529 return NULL;
2530 }
2531 buf = PyString_AsString(result);
2532 len = PyString_Size(result);
2533 if (buf[len-1] == 'L') {
2534 --len;
2535 buf[len] = '\0';
2536 }
2537 sign = buf[0] == '-';
2538 numnondigits += sign;
2539 numdigits = len - numnondigits;
2540 assert(numdigits > 0);
2541
2542 /* Get rid of base marker unless F_ALT */
2543 if ((flags & F_ALT) == 0) {
2544 /* Need to skip 0x, 0X or 0. */
2545 int skipped = 0;
2546 switch (type) {
2547 case 'o':
2548 assert(buf[sign] == '0');
2549 /* If 0 is only digit, leave it alone. */
2550 if (numdigits > 1) {
2551 skipped = 1;
2552 --numdigits;
2553 }
2554 break;
2555 case 'x':
2556 case 'X':
2557 assert(buf[sign] == '0');
2558 assert(buf[sign + 1] == 'x');
2559 skipped = 2;
2560 numnondigits -= 2;
2561 break;
2562 }
2563 if (skipped) {
2564 buf += skipped;
2565 len -= skipped;
2566 if (sign)
2567 buf[0] = '-';
2568 }
2569 assert(len == numnondigits + numdigits);
2570 assert(numdigits > 0);
2571 }
2572
2573 /* Fill with leading zeroes to meet minimum width. */
2574 if (prec > numdigits) {
2575 PyObject *r1 = PyString_FromStringAndSize(NULL,
2576 numnondigits + prec);
2577 char *b1;
2578 if (!r1) {
2579 Py_DECREF(result);
2580 return NULL;
2581 }
2582 b1 = PyString_AS_STRING(r1);
2583 for (i = 0; i < numnondigits; ++i)
2584 *b1++ = *buf++;
2585 for (i = 0; i < prec - numdigits; i++)
2586 *b1++ = '0';
2587 for (i = 0; i < numdigits; i++)
2588 *b1++ = *buf++;
2589 *b1 = '\0';
2590 Py_DECREF(result);
2591 result = r1;
2592 buf = PyString_AS_STRING(result);
2593 len = numnondigits + prec;
2594 }
2595
2596 /* Fix up case for hex conversions. */
2597 switch (type) {
2598 case 'x':
2599 /* Need to convert all upper case letters to lower case. */
2600 for (i = 0; i < len; i++)
2601 if (buf[i] >= 'A' && buf[i] <= 'F')
2602 buf[i] += 'a'-'A';
2603 break;
2604 case 'X':
2605 /* Need to convert 0x to 0X (and -0x to -0X). */
2606 if (buf[sign + 1] == 'x')
2607 buf[sign + 1] = 'X';
2608 break;
2609 }
2610 *pbuf = buf;
2611 *plen = len;
2612 return result;
2613}
2614
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002615static int
Fred Drakeba096332000-07-09 07:04:36 +00002616formatint(char *buf, size_t buflen, int flags,
2617 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002618{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002619 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002620 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2621 + 1 + 1 = 24 */
2622 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002623 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002624 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002625 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002626 if (prec < 0)
2627 prec = 1;
2628 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002629 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002630 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002631 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002632 PyErr_SetString(PyExc_OverflowError,
2633 "formatted integer is too long (precision too long?)");
2634 return -1;
2635 }
Guido van Rossume5372401993-03-16 12:15:04 +00002636 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002637 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002638}
2639
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002640static int
Fred Drakeba096332000-07-09 07:04:36 +00002641formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002642{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002643 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002644 if (PyString_Check(v)) {
2645 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002646 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002647 }
2648 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002649 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002650 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002651 }
2652 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002653 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002654}
2655
Guido van Rossum013142a1994-08-30 08:19:36 +00002656
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002657/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2658
2659 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2660 chars are formatted. XXX This is a magic number. Each formatting
2661 routine does bounds checking to ensure no overflow, but a better
2662 solution may be to malloc a buffer of appropriate size for each
2663 format. For now, the current solution is sufficient.
2664*/
2665#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002666
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002667PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002668PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002669{
2670 char *fmt, *res;
2671 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002672 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002673 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002674 PyObject *dict = NULL;
2675 if (format == NULL || !PyString_Check(format) || args == NULL) {
2676 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002677 return NULL;
2678 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002679 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002680 fmt = PyString_AsString(format);
2681 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002682 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002683 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002684 if (result == NULL)
2685 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002686 res = PyString_AsString(result);
2687 if (PyTuple_Check(args)) {
2688 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002689 argidx = 0;
2690 }
2691 else {
2692 arglen = -1;
2693 argidx = -2;
2694 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002695 if (args->ob_type->tp_as_mapping)
2696 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002697 while (--fmtcnt >= 0) {
2698 if (*fmt != '%') {
2699 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002700 rescnt = fmtcnt + 100;
2701 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002702 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002703 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002704 res = PyString_AsString(result)
2705 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002706 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002707 }
2708 *res++ = *fmt++;
2709 }
2710 else {
2711 /* Got a format specifier */
2712 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002713 int width = -1;
2714 int prec = -1;
2715 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002716 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002717 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002718 PyObject *v = NULL;
2719 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002720 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002721 int sign;
2722 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002723 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002724 char *fmt_start = fmt;
2725
Guido van Rossumda9c2711996-12-05 21:58:58 +00002726 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002727 if (*fmt == '(') {
2728 char *keystart;
2729 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002730 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002731 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002732
2733 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002734 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002735 "format requires a mapping");
2736 goto error;
2737 }
2738 ++fmt;
2739 --fmtcnt;
2740 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002741 /* Skip over balanced parentheses */
2742 while (pcount > 0 && --fmtcnt >= 0) {
2743 if (*fmt == ')')
2744 --pcount;
2745 else if (*fmt == '(')
2746 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002747 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002748 }
2749 keylen = fmt - keystart - 1;
2750 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002751 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002752 "incomplete format key");
2753 goto error;
2754 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002755 key = PyString_FromStringAndSize(keystart,
2756 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002757 if (key == NULL)
2758 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002759 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002760 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002761 args_owned = 0;
2762 }
2763 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002764 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002765 if (args == NULL) {
2766 goto error;
2767 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002768 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002769 arglen = -1;
2770 argidx = -2;
2771 }
Guido van Rossume5372401993-03-16 12:15:04 +00002772 while (--fmtcnt >= 0) {
2773 switch (c = *fmt++) {
2774 case '-': flags |= F_LJUST; continue;
2775 case '+': flags |= F_SIGN; continue;
2776 case ' ': flags |= F_BLANK; continue;
2777 case '#': flags |= F_ALT; continue;
2778 case '0': flags |= F_ZERO; continue;
2779 }
2780 break;
2781 }
2782 if (c == '*') {
2783 v = getnextarg(args, arglen, &argidx);
2784 if (v == NULL)
2785 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002786 if (!PyInt_Check(v)) {
2787 PyErr_SetString(PyExc_TypeError,
2788 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002789 goto error;
2790 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002791 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002792 if (width < 0) {
2793 flags |= F_LJUST;
2794 width = -width;
2795 }
Guido van Rossume5372401993-03-16 12:15:04 +00002796 if (--fmtcnt >= 0)
2797 c = *fmt++;
2798 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002799 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002800 width = c - '0';
2801 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002802 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002803 if (!isdigit(c))
2804 break;
2805 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002806 PyErr_SetString(
2807 PyExc_ValueError,
2808 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002809 goto error;
2810 }
2811 width = width*10 + (c - '0');
2812 }
2813 }
2814 if (c == '.') {
2815 prec = 0;
2816 if (--fmtcnt >= 0)
2817 c = *fmt++;
2818 if (c == '*') {
2819 v = getnextarg(args, arglen, &argidx);
2820 if (v == NULL)
2821 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002822 if (!PyInt_Check(v)) {
2823 PyErr_SetString(
2824 PyExc_TypeError,
2825 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002826 goto error;
2827 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002828 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002829 if (prec < 0)
2830 prec = 0;
2831 if (--fmtcnt >= 0)
2832 c = *fmt++;
2833 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002834 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002835 prec = c - '0';
2836 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002837 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002838 if (!isdigit(c))
2839 break;
2840 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002841 PyErr_SetString(
2842 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002843 "prec too big");
2844 goto error;
2845 }
2846 prec = prec*10 + (c - '0');
2847 }
2848 }
2849 } /* prec */
2850 if (fmtcnt >= 0) {
2851 if (c == 'h' || c == 'l' || c == 'L') {
2852 size = c;
2853 if (--fmtcnt >= 0)
2854 c = *fmt++;
2855 }
2856 }
2857 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002858 PyErr_SetString(PyExc_ValueError,
2859 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002860 goto error;
2861 }
2862 if (c != '%') {
2863 v = getnextarg(args, arglen, &argidx);
2864 if (v == NULL)
2865 goto error;
2866 }
2867 sign = 0;
2868 fill = ' ';
2869 switch (c) {
2870 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002871 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002872 len = 1;
2873 break;
2874 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002875 case 'r':
2876 if (PyUnicode_Check(v)) {
2877 fmt = fmt_start;
2878 goto unicode;
2879 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002880 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002881 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002882 else
2883 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002884 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002885 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002886 if (!PyString_Check(temp)) {
2887 PyErr_SetString(PyExc_TypeError,
2888 "%s argument has non-string str()");
2889 goto error;
2890 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002891 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002892 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002893 if (prec >= 0 && len > prec)
2894 len = prec;
2895 break;
2896 case 'i':
2897 case 'd':
2898 case 'u':
2899 case 'o':
2900 case 'x':
2901 case 'X':
2902 if (c == 'i')
2903 c = 'd';
Tim Peters38fd5b62000-09-21 05:43:11 +00002904 if (PyLong_Check(v) && PyLong_AsLong(v) == -1
2905 && PyErr_Occurred()) {
2906 /* Too big for a C long. */
2907 PyErr_Clear();
2908 temp = _PyString_FormatLong(v, flags,
2909 prec, c, &pbuf, &len);
2910 if (!temp)
2911 goto error;
2912 /* unbounded ints can always produce
2913 a sign character! */
2914 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002915 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002916 else {
2917 pbuf = formatbuf;
2918 len = formatint(pbuf, sizeof(formatbuf),
2919 flags, prec, c, v);
2920 if (len < 0)
2921 goto error;
2922 /* only d conversion is signed */
2923 sign = c == 'd';
2924 }
2925 if (flags & F_ZERO)
2926 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00002927 break;
2928 case 'e':
2929 case 'E':
2930 case 'f':
2931 case 'g':
2932 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002933 pbuf = formatbuf;
2934 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002935 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002936 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002937 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00002938 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00002939 fill = '0';
2940 break;
2941 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002942 pbuf = formatbuf;
2943 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002944 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002945 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002946 break;
2947 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002948 PyErr_Format(PyExc_ValueError,
2949 "unsupported format character '%c' (0x%x)",
2950 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002951 goto error;
2952 }
2953 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002954 if (*pbuf == '-' || *pbuf == '+') {
2955 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002956 len--;
2957 }
2958 else if (flags & F_SIGN)
2959 sign = '+';
2960 else if (flags & F_BLANK)
2961 sign = ' ';
2962 else
Tim Peters38fd5b62000-09-21 05:43:11 +00002963 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002964 }
2965 if (width < len)
2966 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00002967 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002968 reslen -= rescnt;
2969 rescnt = width + fmtcnt + 100;
2970 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002971 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002972 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002973 res = PyString_AsString(result)
2974 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002975 }
2976 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002977 if (fill != ' ')
2978 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002979 rescnt--;
2980 if (width > len)
2981 width--;
2982 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002983 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
2984 assert(pbuf[0] == '0');
2985 assert(pbuf[1] == c);
2986 if (fill != ' ') {
2987 *res++ = *pbuf++;
2988 *res++ = *pbuf++;
2989 }
2990 rescnt -= 2;
2991 width -= 2;
2992 if (width < 0)
2993 width = 0;
2994 len -= 2;
2995 }
2996 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002997 do {
2998 --rescnt;
2999 *res++ = fill;
3000 } while (--width > len);
3001 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003002 if (fill == ' ') {
3003 if (sign)
3004 *res++ = sign;
3005 if ((flags & F_ALT) &&
3006 (c == 'x' || c == 'X')) {
3007 assert(pbuf[0] == '0');
3008 assert(pbuf[1] == c);
3009 *res++ = *pbuf++;
3010 *res++ = *pbuf++;
3011 }
3012 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003013 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003014 res += len;
3015 rescnt -= len;
3016 while (--width >= len) {
3017 --rescnt;
3018 *res++ = ' ';
3019 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003020 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003021 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003022 "not all arguments converted");
3023 goto error;
3024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003025 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003026 } /* '%' */
3027 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003028 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003029 PyErr_SetString(PyExc_TypeError,
3030 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003031 goto error;
3032 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003033 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003034 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003035 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003036 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003037 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003038
3039 unicode:
3040 if (args_owned) {
3041 Py_DECREF(args);
3042 args_owned = 0;
3043 }
3044 /* Fiddle args right (remove the first argidx-1 arguments) */
3045 --argidx;
3046 if (PyTuple_Check(orig_args) && argidx > 0) {
3047 PyObject *v;
3048 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3049 v = PyTuple_New(n);
3050 if (v == NULL)
3051 goto error;
3052 while (--n >= 0) {
3053 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3054 Py_INCREF(w);
3055 PyTuple_SET_ITEM(v, n, w);
3056 }
3057 args = v;
3058 } else {
3059 Py_INCREF(orig_args);
3060 args = orig_args;
3061 }
3062 /* Paste rest of format string to what we have of the result
3063 string; we reuse result for this */
3064 rescnt = res - PyString_AS_STRING(result);
3065 fmtcnt = PyString_GET_SIZE(format) - \
3066 (fmt - PyString_AS_STRING(format));
3067 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
3068 Py_DECREF(args);
3069 goto error;
3070 }
3071 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
3072 format = result;
3073 /* Let Unicode do its magic */
3074 result = PyUnicode_Format(format, args);
3075 Py_DECREF(format);
3076 Py_DECREF(args);
3077 return result;
3078
Guido van Rossume5372401993-03-16 12:15:04 +00003079 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003080 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003081 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003082 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003083 }
Guido van Rossume5372401993-03-16 12:15:04 +00003084 return NULL;
3085}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003086
3087
3088#ifdef INTERN_STRINGS
3089
Barry Warsaw4df762f2000-08-16 23:41:01 +00003090/* This dictionary will leak at PyString_Fini() time. That's acceptable
3091 * because PyString_Fini() specifically frees interned strings that are
3092 * only referenced by this dictionary. The CVS log entry for revision 2.45
3093 * says:
3094 *
3095 * Change the Fini function to only remove otherwise unreferenced
3096 * strings from the interned table. There are references in
3097 * hard-to-find static variables all over the interpreter, and it's not
3098 * worth trying to get rid of all those; but "uninterning" isn't fair
3099 * either and may cause subtle failures later -- so we have to keep them
3100 * in the interned table.
3101 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003102static PyObject *interned;
3103
3104void
Fred Drakeba096332000-07-09 07:04:36 +00003105PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003106{
3107 register PyStringObject *s = (PyStringObject *)(*p);
3108 PyObject *t;
3109 if (s == NULL || !PyString_Check(s))
3110 Py_FatalError("PyString_InternInPlace: strings only please!");
3111 if ((t = s->ob_sinterned) != NULL) {
3112 if (t == (PyObject *)s)
3113 return;
3114 Py_INCREF(t);
3115 *p = t;
3116 Py_DECREF(s);
3117 return;
3118 }
3119 if (interned == NULL) {
3120 interned = PyDict_New();
3121 if (interned == NULL)
3122 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003123 }
3124 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3125 Py_INCREF(t);
3126 *p = s->ob_sinterned = t;
3127 Py_DECREF(s);
3128 return;
3129 }
3130 t = (PyObject *)s;
3131 if (PyDict_SetItem(interned, t, t) == 0) {
3132 s->ob_sinterned = t;
3133 return;
3134 }
3135 PyErr_Clear();
3136}
3137
3138
3139PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003140PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003141{
3142 PyObject *s = PyString_FromString(cp);
3143 if (s == NULL)
3144 return NULL;
3145 PyString_InternInPlace(&s);
3146 return s;
3147}
3148
3149#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003150
3151void
Fred Drakeba096332000-07-09 07:04:36 +00003152PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003153{
3154 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003155 for (i = 0; i < UCHAR_MAX + 1; i++) {
3156 Py_XDECREF(characters[i]);
3157 characters[i] = NULL;
3158 }
3159#ifndef DONT_SHARE_SHORT_STRINGS
3160 Py_XDECREF(nullstring);
3161 nullstring = NULL;
3162#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003163#ifdef INTERN_STRINGS
3164 if (interned) {
3165 int pos, changed;
3166 PyObject *key, *value;
3167 do {
3168 changed = 0;
3169 pos = 0;
3170 while (PyDict_Next(interned, &pos, &key, &value)) {
3171 if (key->ob_refcnt == 2 && key == value) {
3172 PyDict_DelItem(interned, key);
3173 changed = 1;
3174 }
3175 }
3176 } while (changed);
3177 }
3178#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003179}