blob: 80da47d3e9623c0db9afb88493c4123e15890109 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* String object implementation */
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014
Guido van Rossum71160aa1997-06-03 18:03:18 +000015#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000016#include <ctype.h>
17
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018#ifdef COUNT_ALLOCS
19int null_strings, one_strings;
20#endif
21
Guido van Rossum03093a21994-09-28 15:51:32 +000022#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000023#include <limits.h>
24#else
25#ifndef UCHAR_MAX
26#define UCHAR_MAX 255
27#endif
28#endif
29
Guido van Rossumc0b618a1997-05-02 03:12:38 +000030static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000031#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000032static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000033#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000034
35/*
36 Newsizedstringobject() and newstringobject() try in certain cases
37 to share string objects. When the size of the string is zero,
38 these routines always return a pointer to the same string object;
39 when the size is one, they return a pointer to an already existing
40 object if the contents of the string is known. For
41 newstringobject() this is always the case, for
42 newsizedstringobject() this is the case when the first argument in
43 not NULL.
44 A common practice to allocate a string and then fill it in or
45 change it must be done carefully. It is only allowed to change the
46 contents of the string if the obect was gotten from
47 newsizedstringobject() with a NULL first argument, because in the
48 future these routines may try to do even more sharing of objects.
49*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000050PyObject *
51PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000052 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053 int size;
54{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000056#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000073#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
75 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000078 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000079 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000080 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081#ifdef CACHE_HASH
82 op->ob_shash = -1;
83#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000084#ifdef INTERN_STRINGS
85 op->ob_sinterned = NULL;
86#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (str != NULL)
88 memcpy(op->ob_sval, str, size);
89 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000090#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 if (size == 0) {
92 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
95 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000098#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
103PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000104 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000106 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000113#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000128#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
130 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000133 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000135 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136#ifdef CACHE_HASH
137 op->ob_shash = -1;
138#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000139#ifdef INTERN_STRINGS
140 op->ob_sinterned = NULL;
141#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000142 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000143#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
145 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 } else if (size == 1) {
148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000151#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000155PyObject *PyString_Decode(const char *s,
156 int size,
157 const char *encoding,
158 const char *errors)
159{
160 PyObject *buffer = NULL, *str;
161
162 if (encoding == NULL)
163 encoding = PyUnicode_GetDefaultEncoding();
164
165 /* Decode via the codec registry */
166 buffer = PyBuffer_FromMemory((void *)s, size);
167 if (buffer == NULL)
168 goto onError;
169 str = PyCodec_Decode(buffer, encoding, errors);
170 if (str == NULL)
171 goto onError;
172 /* Convert Unicode to a string using the default encoding */
173 if (PyUnicode_Check(str)) {
174 PyObject *temp = str;
175 str = PyUnicode_AsEncodedString(str, NULL, NULL);
176 Py_DECREF(temp);
177 if (str == NULL)
178 goto onError;
179 }
180 if (!PyString_Check(str)) {
181 PyErr_Format(PyExc_TypeError,
182 "decoder did not return an string object (type=%.400s)",
183 str->ob_type->tp_name);
184 Py_DECREF(str);
185 goto onError;
186 }
187 Py_DECREF(buffer);
188 return str;
189
190 onError:
191 Py_XDECREF(buffer);
192 return NULL;
193}
194
195PyObject *PyString_Encode(const char *s,
196 int size,
197 const char *encoding,
198 const char *errors)
199{
200 PyObject *v, *str;
201
202 str = PyString_FromStringAndSize(s, size);
203 if (str == NULL)
204 return NULL;
205 v = PyString_AsEncodedString(str, encoding, errors);
206 Py_DECREF(str);
207 return v;
208}
209
210PyObject *PyString_AsEncodedString(PyObject *str,
211 const char *encoding,
212 const char *errors)
213{
214 PyObject *v;
215
216 if (!PyString_Check(str)) {
217 PyErr_BadArgument();
218 goto onError;
219 }
220
221 if (encoding == NULL)
222 encoding = PyUnicode_GetDefaultEncoding();
223
224 /* Encode via the codec registry */
225 v = PyCodec_Encode(str, encoding, errors);
226 if (v == NULL)
227 goto onError;
228 /* Convert Unicode to a string using the default encoding */
229 if (PyUnicode_Check(v)) {
230 PyObject *temp = v;
231 v = PyUnicode_AsEncodedString(v, NULL, NULL);
232 Py_DECREF(temp);
233 if (v == NULL)
234 goto onError;
235 }
236 if (!PyString_Check(v)) {
237 PyErr_Format(PyExc_TypeError,
238 "encoder did not return a string object (type=%.400s)",
239 v->ob_type->tp_name);
240 Py_DECREF(v);
241 goto onError;
242 }
243 return v;
244
245 onError:
246 return NULL;
247}
248
Guido van Rossum234f9421993-06-17 12:35:49 +0000249static void
Guido van Rossume5372401993-03-16 12:15:04 +0000250string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000251 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000252{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000253 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000254}
255
Guido van Rossumd7047b31995-01-02 19:07:15 +0000256int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000257PyString_Size(op)
258 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000260 if (!PyString_Check(op)) {
261 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 return -1;
263 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000264 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265}
266
267/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000268PyString_AsString(op)
269 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000270{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 if (!PyString_Check(op)) {
272 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000273 return NULL;
274 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000275 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276}
277
278/* Methods */
279
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000280static int
Guido van Rossume5372401993-03-16 12:15:04 +0000281string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000282 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000283 FILE *fp;
284 int flags;
285{
286 int i;
287 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000288 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000289 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000290 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000292 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000293 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000294
295 /* figure out which quote to use; single is prefered */
296 quote = '\'';
297 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
298 quote = '"';
299
300 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000301 for (i = 0; i < op->ob_size; i++) {
302 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000303 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000304 fprintf(fp, "\\%c", c);
305 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000306 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000308 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000309 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000310 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000311 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000312}
313
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000314static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000315string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000316 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000317{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000318 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
319 PyObject *v;
320 if (newsize > INT_MAX) {
321 PyErr_SetString(PyExc_OverflowError,
322 "string is too large to make repr");
323 }
324 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000325 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000326 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000327 }
328 else {
329 register int i;
330 register char c;
331 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000332 int quote;
333
334 /* figure out which quote to use; single is prefered */
335 quote = '\'';
336 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
337 quote = '"';
338
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000339 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000340 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000341 for (i = 0; i < op->ob_size; i++) {
342 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 *p++ = '\\', *p++ = c;
345 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000346 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000347 while (*p != '\0')
348 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349 }
350 else
351 *p++ = c;
352 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000353 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000354 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000355 _PyString_Resize(
356 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000357 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359}
360
361static int
Guido van Rossume5372401993-03-16 12:15:04 +0000362string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000363 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000364{
365 return a->ob_size;
366}
367
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000368static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000369string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000370 register PyStringObject *a;
371 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000372{
373 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000374 register PyStringObject *op;
375 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000376 if (PyUnicode_Check(bb))
377 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000378 PyErr_Format(PyExc_TypeError,
379 "cannot add type \"%.200s\" to string",
380 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000381 return NULL;
382 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000383#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000384 /* Optimize cases with empty left or right operand */
385 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000386 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387 return bb;
388 }
389 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000390 Py_INCREF(a);
391 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000392 }
393 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000394 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000395 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000396 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000397 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000398 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000399 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000400#ifdef CACHE_HASH
401 op->ob_shash = -1;
402#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000403#ifdef INTERN_STRINGS
404 op->ob_sinterned = NULL;
405#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000406 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
407 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
408 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410#undef b
411}
412
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000413static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000414string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 register int n;
417{
418 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000419 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000420 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000421 if (n < 0)
422 n = 0;
423 size = a->ob_size * n;
424 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000425 Py_INCREF(a);
426 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000428 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000429 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000430 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000431 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000432 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000433 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000434#ifdef CACHE_HASH
435 op->ob_shash = -1;
436#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000437#ifdef INTERN_STRINGS
438 op->ob_sinterned = NULL;
439#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000440 for (i = 0; i < size; i += a->ob_size)
441 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
442 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000443 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444}
445
446/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
447
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000448static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000449string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000450 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000451 register int i, j; /* May be negative! */
452{
453 if (i < 0)
454 i = 0;
455 if (j < 0)
456 j = 0; /* Avoid signed/unsigned bug in next line */
457 if (j > a->ob_size)
458 j = a->ob_size;
459 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000460 Py_INCREF(a);
461 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000462 }
463 if (j < i)
464 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000465 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000466}
467
Guido van Rossum9284a572000-03-07 15:53:43 +0000468static int
469string_contains(a, el)
470PyObject *a, *el;
471{
472 register char *s, *end;
473 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000474 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000475 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000476 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000477 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000478 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000479 return -1;
480 }
481 c = PyString_AsString(el)[0];
482 s = PyString_AsString(a);
483 end = s + PyString_Size(a);
484 while (s < end) {
485 if (c == *s++)
486 return 1;
487 }
488 return 0;
489}
490
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000491static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000492string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000493 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000494 register int i;
495{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000496 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000497 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000498 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000499 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000500 return NULL;
501 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000502 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000503 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000504#ifdef COUNT_ALLOCS
505 if (v != NULL)
506 one_strings++;
507#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000508 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000509 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000510 if (v == NULL)
511 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000512 characters[c] = (PyStringObject *) v;
513 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000514 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000515 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000516 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517}
518
519static int
Guido van Rossume5372401993-03-16 12:15:04 +0000520string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522{
Guido van Rossum253919f1991-02-13 23:18:39 +0000523 int len_a = a->ob_size, len_b = b->ob_size;
524 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000525 int cmp;
526 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000527 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000528 if (cmp == 0)
529 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
530 if (cmp != 0)
531 return cmp;
532 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000533 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534}
535
Guido van Rossum9bfef441993-03-29 10:43:31 +0000536static long
537string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000538 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000539{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000540 register int len;
541 register unsigned char *p;
542 register long x;
543
544#ifdef CACHE_HASH
545 if (a->ob_shash != -1)
546 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000547#ifdef INTERN_STRINGS
548 if (a->ob_sinterned != NULL)
549 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000550 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000551#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000552#endif
553 len = a->ob_size;
554 p = (unsigned char *) a->ob_sval;
555 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000556 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000557 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000558 x ^= a->ob_size;
559 if (x == -1)
560 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000561#ifdef CACHE_HASH
562 a->ob_shash = x;
563#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000564 return x;
565}
566
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000567static int
568string_buffer_getreadbuf(self, index, ptr)
569 PyStringObject *self;
570 int index;
571 const void **ptr;
572{
573 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000574 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000575 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000576 return -1;
577 }
578 *ptr = (void *)self->ob_sval;
579 return self->ob_size;
580}
581
582static int
583string_buffer_getwritebuf(self, index, ptr)
584 PyStringObject *self;
585 int index;
586 const void **ptr;
587{
Guido van Rossum045e6881997-09-08 18:30:11 +0000588 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000589 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000590 return -1;
591}
592
593static int
594string_buffer_getsegcount(self, lenp)
595 PyStringObject *self;
596 int *lenp;
597{
598 if ( lenp )
599 *lenp = self->ob_size;
600 return 1;
601}
602
Guido van Rossum1db70701998-10-08 02:18:52 +0000603static int
604string_buffer_getcharbuf(self, index, ptr)
605 PyStringObject *self;
606 int index;
607 const char **ptr;
608{
609 if ( index != 0 ) {
610 PyErr_SetString(PyExc_SystemError,
611 "accessing non-existent string segment");
612 return -1;
613 }
614 *ptr = self->ob_sval;
615 return self->ob_size;
616}
617
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000618static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000619 (inquiry)string_length, /*sq_length*/
620 (binaryfunc)string_concat, /*sq_concat*/
621 (intargfunc)string_repeat, /*sq_repeat*/
622 (intargfunc)string_item, /*sq_item*/
623 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000624 0, /*sq_ass_item*/
625 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000626 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000627};
628
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000629static PyBufferProcs string_as_buffer = {
630 (getreadbufferproc)string_buffer_getreadbuf,
631 (getwritebufferproc)string_buffer_getwritebuf,
632 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000633 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000634};
635
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000636
637
638#define LEFTSTRIP 0
639#define RIGHTSTRIP 1
640#define BOTHSTRIP 2
641
642
643static PyObject *
644split_whitespace(s, len, maxsplit)
645 char *s;
646 int len;
647 int maxsplit;
648{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000649 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000650 PyObject* item;
651 PyObject *list = PyList_New(0);
652
653 if (list == NULL)
654 return NULL;
655
Guido van Rossum4c08d552000-03-10 22:55:18 +0000656 for (i = j = 0; i < len; ) {
657 while (i < len && isspace(Py_CHARMASK(s[i])))
658 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000659 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000660 while (i < len && !isspace(Py_CHARMASK(s[i])))
661 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000662 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000663 if (maxsplit-- <= 0)
664 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665 item = PyString_FromStringAndSize(s+j, (int)(i-j));
666 if (item == NULL)
667 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000668 err = PyList_Append(list, item);
669 Py_DECREF(item);
670 if (err < 0)
671 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000672 while (i < len && isspace(Py_CHARMASK(s[i])))
673 i++;
674 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000675 }
676 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000677 if (j < len) {
678 item = PyString_FromStringAndSize(s+j, (int)(len - j));
679 if (item == NULL)
680 goto finally;
681 err = PyList_Append(list, item);
682 Py_DECREF(item);
683 if (err < 0)
684 goto finally;
685 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000686 return list;
687 finally:
688 Py_DECREF(list);
689 return NULL;
690}
691
692
693static char split__doc__[] =
694"S.split([sep [,maxsplit]]) -> list of strings\n\
695\n\
696Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000697delimiter string. If maxsplit is given, at most maxsplit\n\
698splits are done. If sep is not specified, any whitespace string\n\
699is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000700
701static PyObject *
702string_split(self, args)
703 PyStringObject *self;
704 PyObject *args;
705{
706 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000707 int maxsplit = -1;
708 const char *s = PyString_AS_STRING(self), *sub;
709 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000710
Guido van Rossum4c08d552000-03-10 22:55:18 +0000711 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000712 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000713 if (maxsplit < 0)
714 maxsplit = INT_MAX;
715 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000716 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000717 if (PyString_Check(subobj)) {
718 sub = PyString_AS_STRING(subobj);
719 n = PyString_GET_SIZE(subobj);
720 }
721 else if (PyUnicode_Check(subobj))
722 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
723 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
724 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000725 if (n == 0) {
726 PyErr_SetString(PyExc_ValueError, "empty separator");
727 return NULL;
728 }
729
730 list = PyList_New(0);
731 if (list == NULL)
732 return NULL;
733
734 i = j = 0;
735 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000736 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (maxsplit-- <= 0)
738 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000739 item = PyString_FromStringAndSize(s+j, (int)(i-j));
740 if (item == NULL)
741 goto fail;
742 err = PyList_Append(list, item);
743 Py_DECREF(item);
744 if (err < 0)
745 goto fail;
746 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000747 }
748 else
749 i++;
750 }
751 item = PyString_FromStringAndSize(s+j, (int)(len-j));
752 if (item == NULL)
753 goto fail;
754 err = PyList_Append(list, item);
755 Py_DECREF(item);
756 if (err < 0)
757 goto fail;
758
759 return list;
760
761 fail:
762 Py_DECREF(list);
763 return NULL;
764}
765
766
767static char join__doc__[] =
768"S.join(sequence) -> string\n\
769\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000770Return a string which is the concatenation of the strings in the\n\
771sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000772
773static PyObject *
774string_join(self, args)
775 PyStringObject *self;
776 PyObject *args;
777{
778 char *sep = PyString_AS_STRING(self);
779 int seplen = PyString_GET_SIZE(self);
780 PyObject *res = NULL;
781 int reslen = 0;
782 char *p;
783 int seqlen = 0;
784 int sz = 100;
785 int i, slen;
786 PyObject *seq;
787
Guido van Rossum43713e52000-02-29 13:59:29 +0000788 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 return NULL;
790
791 seqlen = PySequence_Length(seq);
792 if (seqlen < 0 && PyErr_Occurred())
793 return NULL;
794
795 if (seqlen == 1) {
796 /* Optimization if there's only one item */
797 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000798 if (item == NULL)
799 return NULL;
800 if (!PyString_Check(item) &&
801 !PyUnicode_Check(item)) {
802 PyErr_SetString(PyExc_TypeError,
803 "first argument must be sequence of strings");
804 Py_DECREF(item);
805 return NULL;
806 }
807 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000808 }
809 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
810 return NULL;
811 p = PyString_AsString(res);
812
813 /* optimize for lists. all others (tuples and arbitrary sequences)
814 * just use the abstract interface.
815 */
816 if (PyList_Check(seq)) {
817 for (i = 0; i < seqlen; i++) {
818 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000819 if (!PyString_Check(item)){
820 if (PyUnicode_Check(item)) {
821 Py_DECREF(res);
822 return PyUnicode_Join(
823 (PyObject *)self,
824 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000825 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000826 PyErr_Format(PyExc_TypeError,
827 "sequence item %i not a string",
828 i);
829 goto finally;
830 }
831 slen = PyString_GET_SIZE(item);
832 while (reslen + slen + seplen >= sz) {
833 if (_PyString_Resize(&res, sz*2))
834 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000835 sz *= 2;
836 p = PyString_AsString(res) + reslen;
837 }
838 if (i > 0) {
839 memcpy(p, sep, seplen);
840 p += seplen;
841 reslen += seplen;
842 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000843 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000844 p += slen;
845 reslen += slen;
846 }
847 }
848 else {
849 for (i = 0; i < seqlen; i++) {
850 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000851 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000852 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000853 if (!PyString_Check(item)){
854 if (PyUnicode_Check(item)) {
855 Py_DECREF(res);
856 Py_DECREF(item);
857 return PyUnicode_Join(
858 (PyObject *)self,
859 seq);
860 }
861 Py_DECREF(item);
862 PyErr_Format(PyExc_TypeError,
863 "sequence item %i not a string",
864 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000865 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000866 }
867 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000869 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000870 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000871 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000872 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000873 sz *= 2;
874 p = PyString_AsString(res) + reslen;
875 }
876 if (i > 0) {
877 memcpy(p, sep, seplen);
878 p += seplen;
879 reslen += seplen;
880 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000881 memcpy(p, PyString_AS_STRING(item), slen);
882 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000883 p += slen;
884 reslen += slen;
885 }
886 }
887 if (_PyString_Resize(&res, reslen))
888 goto finally;
889 return res;
890
891 finally:
892 Py_DECREF(res);
893 return NULL;
894}
895
896
897
898static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000899string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000900 PyStringObject *self;
901 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000902 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000903{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000904 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000905 int len = PyString_GET_SIZE(self);
906 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000907 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000908
Guido van Rossumc6821402000-05-08 14:08:05 +0000909 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
910 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000911 return -2;
912 if (PyString_Check(subobj)) {
913 sub = PyString_AS_STRING(subobj);
914 n = PyString_GET_SIZE(subobj);
915 }
916 else if (PyUnicode_Check(subobj))
917 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
918 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000919 return -2;
920
921 if (last > len)
922 last = len;
923 if (last < 0)
924 last += len;
925 if (last < 0)
926 last = 0;
927 if (i < 0)
928 i += len;
929 if (i < 0)
930 i = 0;
931
Guido van Rossum4c08d552000-03-10 22:55:18 +0000932 if (dir > 0) {
933 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000934 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000935 last -= n;
936 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000937 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000938 return (long)i;
939 }
940 else {
941 int j;
942
943 if (n == 0 && i <= last)
944 return (long)last;
945 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000946 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000947 return (long)j;
948 }
949
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000950 return -1;
951}
952
953
954static char find__doc__[] =
955"S.find(sub [,start [,end]]) -> int\n\
956\n\
957Return the lowest index in S where substring sub is found,\n\
958such that sub is contained within s[start,end]. Optional\n\
959arguments start and end are interpreted as in slice notation.\n\
960\n\
961Return -1 on failure.";
962
963static PyObject *
964string_find(self, args)
965 PyStringObject *self;
966 PyObject *args;
967{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000968 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000969 if (result == -2)
970 return NULL;
971 return PyInt_FromLong(result);
972}
973
974
975static char index__doc__[] =
976"S.index(sub [,start [,end]]) -> int\n\
977\n\
978Like S.find() but raise ValueError when the substring is not found.";
979
980static PyObject *
981string_index(self, args)
982 PyStringObject *self;
983 PyObject *args;
984{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000985 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000986 if (result == -2)
987 return NULL;
988 if (result == -1) {
989 PyErr_SetString(PyExc_ValueError,
990 "substring not found in string.index");
991 return NULL;
992 }
993 return PyInt_FromLong(result);
994}
995
996
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000997static char rfind__doc__[] =
998"S.rfind(sub [,start [,end]]) -> int\n\
999\n\
1000Return the highest index in S where substring sub is found,\n\
1001such that sub is contained within s[start,end]. Optional\n\
1002arguments start and end are interpreted as in slice notation.\n\
1003\n\
1004Return -1 on failure.";
1005
1006static PyObject *
1007string_rfind(self, args)
1008 PyStringObject *self;
1009 PyObject *args;
1010{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001011 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001012 if (result == -2)
1013 return NULL;
1014 return PyInt_FromLong(result);
1015}
1016
1017
1018static char rindex__doc__[] =
1019"S.rindex(sub [,start [,end]]) -> int\n\
1020\n\
1021Like S.rfind() but raise ValueError when the substring is not found.";
1022
1023static PyObject *
1024string_rindex(self, args)
1025 PyStringObject *self;
1026 PyObject *args;
1027{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001028 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001029 if (result == -2)
1030 return NULL;
1031 if (result == -1) {
1032 PyErr_SetString(PyExc_ValueError,
1033 "substring not found in string.rindex");
1034 return NULL;
1035 }
1036 return PyInt_FromLong(result);
1037}
1038
1039
1040static PyObject *
1041do_strip(self, args, striptype)
1042 PyStringObject *self;
1043 PyObject *args;
1044 int striptype;
1045{
1046 char *s = PyString_AS_STRING(self);
1047 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001048
Guido van Rossum43713e52000-02-29 13:59:29 +00001049 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050 return NULL;
1051
1052 i = 0;
1053 if (striptype != RIGHTSTRIP) {
1054 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1055 i++;
1056 }
1057 }
1058
1059 j = len;
1060 if (striptype != LEFTSTRIP) {
1061 do {
1062 j--;
1063 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1064 j++;
1065 }
1066
1067 if (i == 0 && j == len) {
1068 Py_INCREF(self);
1069 return (PyObject*)self;
1070 }
1071 else
1072 return PyString_FromStringAndSize(s+i, j-i);
1073}
1074
1075
1076static char strip__doc__[] =
1077"S.strip() -> string\n\
1078\n\
1079Return a copy of the string S with leading and trailing\n\
1080whitespace removed.";
1081
1082static PyObject *
1083string_strip(self, args)
1084 PyStringObject *self;
1085 PyObject *args;
1086{
1087 return do_strip(self, args, BOTHSTRIP);
1088}
1089
1090
1091static char lstrip__doc__[] =
1092"S.lstrip() -> string\n\
1093\n\
1094Return a copy of the string S with leading whitespace removed.";
1095
1096static PyObject *
1097string_lstrip(self, args)
1098 PyStringObject *self;
1099 PyObject *args;
1100{
1101 return do_strip(self, args, LEFTSTRIP);
1102}
1103
1104
1105static char rstrip__doc__[] =
1106"S.rstrip() -> string\n\
1107\n\
1108Return a copy of the string S with trailing whitespace removed.";
1109
1110static PyObject *
1111string_rstrip(self, args)
1112 PyStringObject *self;
1113 PyObject *args;
1114{
1115 return do_strip(self, args, RIGHTSTRIP);
1116}
1117
1118
1119static char lower__doc__[] =
1120"S.lower() -> string\n\
1121\n\
1122Return a copy of the string S converted to lowercase.";
1123
1124static PyObject *
1125string_lower(self, args)
1126 PyStringObject *self;
1127 PyObject *args;
1128{
1129 char *s = PyString_AS_STRING(self), *s_new;
1130 int i, n = PyString_GET_SIZE(self);
1131 PyObject *new;
1132
Guido van Rossum43713e52000-02-29 13:59:29 +00001133 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001134 return NULL;
1135 new = PyString_FromStringAndSize(NULL, n);
1136 if (new == NULL)
1137 return NULL;
1138 s_new = PyString_AsString(new);
1139 for (i = 0; i < n; i++) {
1140 int c = Py_CHARMASK(*s++);
1141 if (isupper(c)) {
1142 *s_new = tolower(c);
1143 } else
1144 *s_new = c;
1145 s_new++;
1146 }
1147 return new;
1148}
1149
1150
1151static char upper__doc__[] =
1152"S.upper() -> string\n\
1153\n\
1154Return a copy of the string S converted to uppercase.";
1155
1156static PyObject *
1157string_upper(self, args)
1158 PyStringObject *self;
1159 PyObject *args;
1160{
1161 char *s = PyString_AS_STRING(self), *s_new;
1162 int i, n = PyString_GET_SIZE(self);
1163 PyObject *new;
1164
Guido van Rossum43713e52000-02-29 13:59:29 +00001165 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166 return NULL;
1167 new = PyString_FromStringAndSize(NULL, n);
1168 if (new == NULL)
1169 return NULL;
1170 s_new = PyString_AsString(new);
1171 for (i = 0; i < n; i++) {
1172 int c = Py_CHARMASK(*s++);
1173 if (islower(c)) {
1174 *s_new = toupper(c);
1175 } else
1176 *s_new = c;
1177 s_new++;
1178 }
1179 return new;
1180}
1181
1182
Guido van Rossum4c08d552000-03-10 22:55:18 +00001183static char title__doc__[] =
1184"S.title() -> string\n\
1185\n\
1186Return a titlecased version of S, i.e. words start with uppercase\n\
1187characters, all remaining cased characters have lowercase.";
1188
1189static PyObject*
1190string_title(PyUnicodeObject *self, PyObject *args)
1191{
1192 char *s = PyString_AS_STRING(self), *s_new;
1193 int i, n = PyString_GET_SIZE(self);
1194 int previous_is_cased = 0;
1195 PyObject *new;
1196
1197 if (!PyArg_ParseTuple(args, ":title"))
1198 return NULL;
1199 new = PyString_FromStringAndSize(NULL, n);
1200 if (new == NULL)
1201 return NULL;
1202 s_new = PyString_AsString(new);
1203 for (i = 0; i < n; i++) {
1204 int c = Py_CHARMASK(*s++);
1205 if (islower(c)) {
1206 if (!previous_is_cased)
1207 c = toupper(c);
1208 previous_is_cased = 1;
1209 } else if (isupper(c)) {
1210 if (previous_is_cased)
1211 c = tolower(c);
1212 previous_is_cased = 1;
1213 } else
1214 previous_is_cased = 0;
1215 *s_new++ = c;
1216 }
1217 return new;
1218}
1219
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001220static char capitalize__doc__[] =
1221"S.capitalize() -> string\n\
1222\n\
1223Return a copy of the string S with only its first character\n\
1224capitalized.";
1225
1226static PyObject *
1227string_capitalize(self, args)
1228 PyStringObject *self;
1229 PyObject *args;
1230{
1231 char *s = PyString_AS_STRING(self), *s_new;
1232 int i, n = PyString_GET_SIZE(self);
1233 PyObject *new;
1234
Guido van Rossum43713e52000-02-29 13:59:29 +00001235 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001236 return NULL;
1237 new = PyString_FromStringAndSize(NULL, n);
1238 if (new == NULL)
1239 return NULL;
1240 s_new = PyString_AsString(new);
1241 if (0 < n) {
1242 int c = Py_CHARMASK(*s++);
1243 if (islower(c))
1244 *s_new = toupper(c);
1245 else
1246 *s_new = c;
1247 s_new++;
1248 }
1249 for (i = 1; i < n; i++) {
1250 int c = Py_CHARMASK(*s++);
1251 if (isupper(c))
1252 *s_new = tolower(c);
1253 else
1254 *s_new = c;
1255 s_new++;
1256 }
1257 return new;
1258}
1259
1260
1261static char count__doc__[] =
1262"S.count(sub[, start[, end]]) -> int\n\
1263\n\
1264Return the number of occurrences of substring sub in string\n\
1265S[start:end]. Optional arguments start and end are\n\
1266interpreted as in slice notation.";
1267
1268static PyObject *
1269string_count(self, args)
1270 PyStringObject *self;
1271 PyObject *args;
1272{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001273 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001274 int len = PyString_GET_SIZE(self), n;
1275 int i = 0, last = INT_MAX;
1276 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001277 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278
Guido van Rossumc6821402000-05-08 14:08:05 +00001279 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1280 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001282
Guido van Rossum4c08d552000-03-10 22:55:18 +00001283 if (PyString_Check(subobj)) {
1284 sub = PyString_AS_STRING(subobj);
1285 n = PyString_GET_SIZE(subobj);
1286 }
1287 else if (PyUnicode_Check(subobj))
1288 return PyInt_FromLong(
1289 PyUnicode_Count((PyObject *)self, subobj, i, last));
1290 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1291 return NULL;
1292
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293 if (last > len)
1294 last = len;
1295 if (last < 0)
1296 last += len;
1297 if (last < 0)
1298 last = 0;
1299 if (i < 0)
1300 i += len;
1301 if (i < 0)
1302 i = 0;
1303 m = last + 1 - n;
1304 if (n == 0)
1305 return PyInt_FromLong((long) (m-i));
1306
1307 r = 0;
1308 while (i < m) {
1309 if (!memcmp(s+i, sub, n)) {
1310 r++;
1311 i += n;
1312 } else {
1313 i++;
1314 }
1315 }
1316 return PyInt_FromLong((long) r);
1317}
1318
1319
1320static char swapcase__doc__[] =
1321"S.swapcase() -> string\n\
1322\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001323Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324converted to lowercase and vice versa.";
1325
1326static PyObject *
1327string_swapcase(self, args)
1328 PyStringObject *self;
1329 PyObject *args;
1330{
1331 char *s = PyString_AS_STRING(self), *s_new;
1332 int i, n = PyString_GET_SIZE(self);
1333 PyObject *new;
1334
Guido van Rossum43713e52000-02-29 13:59:29 +00001335 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336 return NULL;
1337 new = PyString_FromStringAndSize(NULL, n);
1338 if (new == NULL)
1339 return NULL;
1340 s_new = PyString_AsString(new);
1341 for (i = 0; i < n; i++) {
1342 int c = Py_CHARMASK(*s++);
1343 if (islower(c)) {
1344 *s_new = toupper(c);
1345 }
1346 else if (isupper(c)) {
1347 *s_new = tolower(c);
1348 }
1349 else
1350 *s_new = c;
1351 s_new++;
1352 }
1353 return new;
1354}
1355
1356
1357static char translate__doc__[] =
1358"S.translate(table [,deletechars]) -> string\n\
1359\n\
1360Return a copy of the string S, where all characters occurring\n\
1361in the optional argument deletechars are removed, and the\n\
1362remaining characters have been mapped through the given\n\
1363translation table, which must be a string of length 256.";
1364
1365static PyObject *
1366string_translate(self, args)
1367 PyStringObject *self;
1368 PyObject *args;
1369{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001370 register char *input, *output;
1371 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372 register int i, c, changed = 0;
1373 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001374 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375 int inlen, tablen, dellen = 0;
1376 PyObject *result;
1377 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001378 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379
Guido van Rossum4c08d552000-03-10 22:55:18 +00001380 if (!PyArg_ParseTuple(args, "O|O:translate",
1381 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001383
1384 if (PyString_Check(tableobj)) {
1385 table1 = PyString_AS_STRING(tableobj);
1386 tablen = PyString_GET_SIZE(tableobj);
1387 }
1388 else if (PyUnicode_Check(tableobj)) {
1389 /* Unicode .translate() does not support the deletechars
1390 parameter; instead a mapping to None will cause characters
1391 to be deleted. */
1392 if (delobj != NULL) {
1393 PyErr_SetString(PyExc_TypeError,
1394 "deletions are implemented differently for unicode");
1395 return NULL;
1396 }
1397 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1398 }
1399 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001401
1402 if (delobj != NULL) {
1403 if (PyString_Check(delobj)) {
1404 del_table = PyString_AS_STRING(delobj);
1405 dellen = PyString_GET_SIZE(delobj);
1406 }
1407 else if (PyUnicode_Check(delobj)) {
1408 PyErr_SetString(PyExc_TypeError,
1409 "deletions are implemented differently for unicode");
1410 return NULL;
1411 }
1412 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1413 return NULL;
1414
1415 if (tablen != 256) {
1416 PyErr_SetString(PyExc_ValueError,
1417 "translation table must be 256 characters long");
1418 return NULL;
1419 }
1420 }
1421 else {
1422 del_table = NULL;
1423 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424 }
1425
1426 table = table1;
1427 inlen = PyString_Size(input_obj);
1428 result = PyString_FromStringAndSize((char *)NULL, inlen);
1429 if (result == NULL)
1430 return NULL;
1431 output_start = output = PyString_AsString(result);
1432 input = PyString_AsString(input_obj);
1433
1434 if (dellen == 0) {
1435 /* If no deletions are required, use faster code */
1436 for (i = inlen; --i >= 0; ) {
1437 c = Py_CHARMASK(*input++);
1438 if (Py_CHARMASK((*output++ = table[c])) != c)
1439 changed = 1;
1440 }
1441 if (changed)
1442 return result;
1443 Py_DECREF(result);
1444 Py_INCREF(input_obj);
1445 return input_obj;
1446 }
1447
1448 for (i = 0; i < 256; i++)
1449 trans_table[i] = Py_CHARMASK(table[i]);
1450
1451 for (i = 0; i < dellen; i++)
1452 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1453
1454 for (i = inlen; --i >= 0; ) {
1455 c = Py_CHARMASK(*input++);
1456 if (trans_table[c] != -1)
1457 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1458 continue;
1459 changed = 1;
1460 }
1461 if (!changed) {
1462 Py_DECREF(result);
1463 Py_INCREF(input_obj);
1464 return input_obj;
1465 }
1466 /* Fix the size of the resulting string */
1467 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1468 return NULL;
1469 return result;
1470}
1471
1472
1473/* What follows is used for implementing replace(). Perry Stoll. */
1474
1475/*
1476 mymemfind
1477
1478 strstr replacement for arbitrary blocks of memory.
1479
Barry Warsaw51ac5802000-03-20 16:36:48 +00001480 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 contents of memory pointed to by PAT. Returns the index into MEM if
1482 found, or -1 if not found. If len of PAT is greater than length of
1483 MEM, the function returns -1.
1484*/
1485static int
1486mymemfind(mem, len, pat, pat_len)
1487 char *mem;
1488 int len;
1489 char *pat;
1490 int pat_len;
1491{
1492 register int ii;
1493
1494 /* pattern can not occur in the last pat_len-1 chars */
1495 len -= pat_len;
1496
1497 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001498 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 return ii;
1500 }
1501 }
1502 return -1;
1503}
1504
1505/*
1506 mymemcnt
1507
1508 Return the number of distinct times PAT is found in MEM.
1509 meaning mem=1111 and pat==11 returns 2.
1510 mem=11111 and pat==11 also return 2.
1511 */
1512static int
1513mymemcnt(mem, len, pat, pat_len)
1514 char *mem;
1515 int len;
1516 char *pat;
1517 int pat_len;
1518{
1519 register int offset = 0;
1520 int nfound = 0;
1521
1522 while (len >= 0) {
1523 offset = mymemfind(mem, len, pat, pat_len);
1524 if (offset == -1)
1525 break;
1526 mem += offset + pat_len;
1527 len -= offset + pat_len;
1528 nfound++;
1529 }
1530 return nfound;
1531}
1532
1533/*
1534 mymemreplace
1535
1536 Return a string in which all occurences of PAT in memory STR are
1537 replaced with SUB.
1538
1539 If length of PAT is less than length of STR or there are no occurences
1540 of PAT in STR, then the original string is returned. Otherwise, a new
1541 string is allocated here and returned.
1542
1543 on return, out_len is:
1544 the length of output string, or
1545 -1 if the input string is returned, or
1546 unchanged if an error occurs (no memory).
1547
1548 return value is:
1549 the new string allocated locally, or
1550 NULL if an error occurred.
1551*/
1552static char *
1553mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1554 char *str;
1555 int len; /* input string */
1556 char *pat;
1557 int pat_len; /* pattern string to find */
1558 char *sub;
1559 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001560 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561 int *out_len;
1562
1563{
1564 char *out_s;
1565 char *new_s;
1566 int nfound, offset, new_len;
1567
1568 if (len == 0 || pat_len > len)
1569 goto return_same;
1570
1571 /* find length of output string */
1572 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001573 if (count < 0)
1574 count = INT_MAX;
1575 else if (nfound > count)
1576 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001577 if (nfound == 0)
1578 goto return_same;
1579 new_len = len + nfound*(sub_len - pat_len);
1580
Guido van Rossumb18618d2000-05-03 23:44:39 +00001581 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 if (new_s == NULL) return NULL;
1583
1584 *out_len = new_len;
1585 out_s = new_s;
1586
1587 while (len > 0) {
1588 /* find index of next instance of pattern */
1589 offset = mymemfind(str, len, pat, pat_len);
1590 /* if not found, break out of loop */
1591 if (offset == -1) break;
1592
1593 /* copy non matching part of input string */
1594 memcpy(new_s, str, offset); /* copy part of str before pat */
1595 str += offset + pat_len; /* move str past pattern */
1596 len -= offset + pat_len; /* reduce length of str remaining */
1597
1598 /* copy substitute into the output string */
1599 new_s += offset; /* move new_s to dest for sub string */
1600 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1601 new_s += sub_len; /* offset new_s past sub string */
1602
1603 /* break when we've done count replacements */
1604 if (--count == 0) break;
1605 }
1606 /* copy any remaining values into output string */
1607 if (len > 0)
1608 memcpy(new_s, str, len);
1609 return out_s;
1610
1611 return_same:
1612 *out_len = -1;
1613 return str;
1614}
1615
1616
1617static char replace__doc__[] =
1618"S.replace (old, new[, maxsplit]) -> string\n\
1619\n\
1620Return a copy of string S with all occurrences of substring\n\
1621old replaced by new. If the optional argument maxsplit is\n\
1622given, only the first maxsplit occurrences are replaced.";
1623
1624static PyObject *
1625string_replace(self, args)
1626 PyStringObject *self;
1627 PyObject *args;
1628{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001629 const char *str = PyString_AS_STRING(self), *sub, *repl;
1630 char *new_s;
1631 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1632 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001634 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635
Guido van Rossum4c08d552000-03-10 22:55:18 +00001636 if (!PyArg_ParseTuple(args, "OO|i:replace",
1637 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001639
1640 if (PyString_Check(subobj)) {
1641 sub = PyString_AS_STRING(subobj);
1642 sub_len = PyString_GET_SIZE(subobj);
1643 }
1644 else if (PyUnicode_Check(subobj))
1645 return PyUnicode_Replace((PyObject *)self,
1646 subobj, replobj, count);
1647 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1648 return NULL;
1649
1650 if (PyString_Check(replobj)) {
1651 repl = PyString_AS_STRING(replobj);
1652 repl_len = PyString_GET_SIZE(replobj);
1653 }
1654 else if (PyUnicode_Check(replobj))
1655 return PyUnicode_Replace((PyObject *)self,
1656 subobj, replobj, count);
1657 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1658 return NULL;
1659
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001660 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001661 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662 return NULL;
1663 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001664 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665 if (new_s == NULL) {
1666 PyErr_NoMemory();
1667 return NULL;
1668 }
1669 if (out_len == -1) {
1670 /* we're returning another reference to self */
1671 new = (PyObject*)self;
1672 Py_INCREF(new);
1673 }
1674 else {
1675 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001676 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001677 }
1678 return new;
1679}
1680
1681
1682static char startswith__doc__[] =
1683"S.startswith(prefix[, start[, end]]) -> int\n\
1684\n\
1685Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1686optional start, test S beginning at that position. With optional end, stop\n\
1687comparing S at that position.";
1688
1689static PyObject *
1690string_startswith(self, args)
1691 PyStringObject *self;
1692 PyObject *args;
1693{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001694 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001696 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001697 int plen;
1698 int start = 0;
1699 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001700 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001701
Guido van Rossumc6821402000-05-08 14:08:05 +00001702 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1703 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001704 return NULL;
1705 if (PyString_Check(subobj)) {
1706 prefix = PyString_AS_STRING(subobj);
1707 plen = PyString_GET_SIZE(subobj);
1708 }
1709 else if (PyUnicode_Check(subobj))
1710 return PyInt_FromLong(
1711 PyUnicode_Tailmatch((PyObject *)self,
1712 subobj, start, end, -1));
1713 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714 return NULL;
1715
1716 /* adopt Java semantics for index out of range. it is legal for
1717 * offset to be == plen, but this only returns true if prefix is
1718 * the empty string.
1719 */
1720 if (start < 0 || start+plen > len)
1721 return PyInt_FromLong(0);
1722
1723 if (!memcmp(str+start, prefix, plen)) {
1724 /* did the match end after the specified end? */
1725 if (end < 0)
1726 return PyInt_FromLong(1);
1727 else if (end - start < plen)
1728 return PyInt_FromLong(0);
1729 else
1730 return PyInt_FromLong(1);
1731 }
1732 else return PyInt_FromLong(0);
1733}
1734
1735
1736static char endswith__doc__[] =
1737"S.endswith(suffix[, start[, end]]) -> int\n\
1738\n\
1739Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1740optional start, test S beginning at that position. With optional end, stop\n\
1741comparing S at that position.";
1742
1743static PyObject *
1744string_endswith(self, args)
1745 PyStringObject *self;
1746 PyObject *args;
1747{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750 const char* suffix;
1751 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752 int start = 0;
1753 int end = -1;
1754 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756
Guido van Rossumc6821402000-05-08 14:08:05 +00001757 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1758 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001759 return NULL;
1760 if (PyString_Check(subobj)) {
1761 suffix = PyString_AS_STRING(subobj);
1762 slen = PyString_GET_SIZE(subobj);
1763 }
1764 else if (PyUnicode_Check(subobj))
1765 return PyInt_FromLong(
1766 PyUnicode_Tailmatch((PyObject *)self,
1767 subobj, start, end, +1));
1768 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769 return NULL;
1770
Guido van Rossum4c08d552000-03-10 22:55:18 +00001771 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772 return PyInt_FromLong(0);
1773
1774 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001775 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776
Guido van Rossum4c08d552000-03-10 22:55:18 +00001777 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 return PyInt_FromLong(1);
1779 else return PyInt_FromLong(0);
1780}
1781
1782
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001783static char encode__doc__[] =
1784"S.encode([encoding[,errors]]) -> string\n\
1785\n\
1786Return an encoded string version of S. Default encoding is the current\n\
1787default string encoding. errors may be given to set a different error\n\
1788handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1789a ValueError. Other possible values are 'ignore' and 'replace'.";
1790
1791static PyObject *
1792string_encode(PyStringObject *self, PyObject *args)
1793{
1794 char *encoding = NULL;
1795 char *errors = NULL;
1796 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1797 return NULL;
1798 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1799}
1800
1801
Guido van Rossum4c08d552000-03-10 22:55:18 +00001802static char expandtabs__doc__[] =
1803"S.expandtabs([tabsize]) -> string\n\
1804\n\
1805Return a copy of S where all tab characters are expanded using spaces.\n\
1806If tabsize is not given, a tab size of 8 characters is assumed.";
1807
1808static PyObject*
1809string_expandtabs(PyStringObject *self, PyObject *args)
1810{
1811 const char *e, *p;
1812 char *q;
1813 int i, j;
1814 PyObject *u;
1815 int tabsize = 8;
1816
1817 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1818 return NULL;
1819
1820 /* First pass: determine size of ouput string */
1821 i = j = 0;
1822 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1823 for (p = PyString_AS_STRING(self); p < e; p++)
1824 if (*p == '\t') {
1825 if (tabsize > 0)
1826 j += tabsize - (j % tabsize);
1827 }
1828 else {
1829 j++;
1830 if (*p == '\n' || *p == '\r') {
1831 i += j;
1832 j = 0;
1833 }
1834 }
1835
1836 /* Second pass: create output string and fill it */
1837 u = PyString_FromStringAndSize(NULL, i + j);
1838 if (!u)
1839 return NULL;
1840
1841 j = 0;
1842 q = PyString_AS_STRING(u);
1843
1844 for (p = PyString_AS_STRING(self); p < e; p++)
1845 if (*p == '\t') {
1846 if (tabsize > 0) {
1847 i = tabsize - (j % tabsize);
1848 j += i;
1849 while (i--)
1850 *q++ = ' ';
1851 }
1852 }
1853 else {
1854 j++;
1855 *q++ = *p;
1856 if (*p == '\n' || *p == '\r')
1857 j = 0;
1858 }
1859
1860 return u;
1861}
1862
1863static
1864PyObject *pad(PyStringObject *self,
1865 int left,
1866 int right,
1867 char fill)
1868{
1869 PyObject *u;
1870
1871 if (left < 0)
1872 left = 0;
1873 if (right < 0)
1874 right = 0;
1875
1876 if (left == 0 && right == 0) {
1877 Py_INCREF(self);
1878 return (PyObject *)self;
1879 }
1880
1881 u = PyString_FromStringAndSize(NULL,
1882 left + PyString_GET_SIZE(self) + right);
1883 if (u) {
1884 if (left)
1885 memset(PyString_AS_STRING(u), fill, left);
1886 memcpy(PyString_AS_STRING(u) + left,
1887 PyString_AS_STRING(self),
1888 PyString_GET_SIZE(self));
1889 if (right)
1890 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1891 fill, right);
1892 }
1893
1894 return u;
1895}
1896
1897static char ljust__doc__[] =
1898"S.ljust(width) -> string\n\
1899\n\
1900Return S left justified in a string of length width. Padding is\n\
1901done using spaces.";
1902
1903static PyObject *
1904string_ljust(PyStringObject *self, PyObject *args)
1905{
1906 int width;
1907 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1908 return NULL;
1909
1910 if (PyString_GET_SIZE(self) >= width) {
1911 Py_INCREF(self);
1912 return (PyObject*) self;
1913 }
1914
1915 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1916}
1917
1918
1919static char rjust__doc__[] =
1920"S.rjust(width) -> string\n\
1921\n\
1922Return S right justified in a string of length width. Padding is\n\
1923done using spaces.";
1924
1925static PyObject *
1926string_rjust(PyStringObject *self, PyObject *args)
1927{
1928 int width;
1929 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1930 return NULL;
1931
1932 if (PyString_GET_SIZE(self) >= width) {
1933 Py_INCREF(self);
1934 return (PyObject*) self;
1935 }
1936
1937 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1938}
1939
1940
1941static char center__doc__[] =
1942"S.center(width) -> string\n\
1943\n\
1944Return S centered in a string of length width. Padding is done\n\
1945using spaces.";
1946
1947static PyObject *
1948string_center(PyStringObject *self, PyObject *args)
1949{
1950 int marg, left;
1951 int width;
1952
1953 if (!PyArg_ParseTuple(args, "i:center", &width))
1954 return NULL;
1955
1956 if (PyString_GET_SIZE(self) >= width) {
1957 Py_INCREF(self);
1958 return (PyObject*) self;
1959 }
1960
1961 marg = width - PyString_GET_SIZE(self);
1962 left = marg / 2 + (marg & width & 1);
1963
1964 return pad(self, left, marg - left, ' ');
1965}
1966
1967#if 0
1968static char zfill__doc__[] =
1969"S.zfill(width) -> string\n\
1970\n\
1971Pad a numeric string x with zeros on the left, to fill a field\n\
1972of the specified width. The string x is never truncated.";
1973
1974static PyObject *
1975string_zfill(PyStringObject *self, PyObject *args)
1976{
1977 int fill;
1978 PyObject *u;
1979 char *str;
1980
1981 int width;
1982 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1983 return NULL;
1984
1985 if (PyString_GET_SIZE(self) >= width) {
1986 Py_INCREF(self);
1987 return (PyObject*) self;
1988 }
1989
1990 fill = width - PyString_GET_SIZE(self);
1991
1992 u = pad(self, fill, 0, '0');
1993 if (u == NULL)
1994 return NULL;
1995
1996 str = PyString_AS_STRING(u);
1997 if (str[fill] == '+' || str[fill] == '-') {
1998 /* move sign to beginning of string */
1999 str[0] = str[fill];
2000 str[fill] = '0';
2001 }
2002
2003 return u;
2004}
2005#endif
2006
2007static char isspace__doc__[] =
2008"S.isspace() -> int\n\
2009\n\
2010Return 1 if there are only whitespace characters in S,\n\
20110 otherwise.";
2012
2013static PyObject*
2014string_isspace(PyStringObject *self, PyObject *args)
2015{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002016 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2017 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002018
2019 if (!PyArg_NoArgs(args))
2020 return NULL;
2021
2022 /* Shortcut for single character strings */
2023 if (PyString_GET_SIZE(self) == 1 &&
2024 isspace(*p))
2025 return PyInt_FromLong(1);
2026
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002027 /* Special case for empty strings */
2028 if (PyString_GET_SIZE(self) == 0)
2029 return PyInt_FromLong(0);
2030
Guido van Rossum4c08d552000-03-10 22:55:18 +00002031 e = p + PyString_GET_SIZE(self);
2032 for (; p < e; p++) {
2033 if (!isspace(*p))
2034 return PyInt_FromLong(0);
2035 }
2036 return PyInt_FromLong(1);
2037}
2038
2039
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002040static char isalpha__doc__[] =
2041"S.isalpha() -> int\n\
2042\n\
2043Return 1 if all characters in S are alphabetic\n\
2044and there is at least one character in S, 0 otherwise.";
2045
2046static PyObject*
2047string_isalpha(PyUnicodeObject *self, PyObject *args)
2048{
2049 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2050 register const unsigned char *e;
2051
2052 if (!PyArg_NoArgs(args))
2053 return NULL;
2054
2055 /* Shortcut for single character strings */
2056 if (PyString_GET_SIZE(self) == 1 &&
2057 isalpha(*p))
2058 return PyInt_FromLong(1);
2059
2060 /* Special case for empty strings */
2061 if (PyString_GET_SIZE(self) == 0)
2062 return PyInt_FromLong(0);
2063
2064 e = p + PyString_GET_SIZE(self);
2065 for (; p < e; p++) {
2066 if (!isalpha(*p))
2067 return PyInt_FromLong(0);
2068 }
2069 return PyInt_FromLong(1);
2070}
2071
2072
2073static char isalnum__doc__[] =
2074"S.isalnum() -> int\n\
2075\n\
2076Return 1 if all characters in S are alphanumeric\n\
2077and there is at least one character in S, 0 otherwise.";
2078
2079static PyObject*
2080string_isalnum(PyUnicodeObject *self, PyObject *args)
2081{
2082 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2083 register const unsigned char *e;
2084
2085 if (!PyArg_NoArgs(args))
2086 return NULL;
2087
2088 /* Shortcut for single character strings */
2089 if (PyString_GET_SIZE(self) == 1 &&
2090 isalnum(*p))
2091 return PyInt_FromLong(1);
2092
2093 /* Special case for empty strings */
2094 if (PyString_GET_SIZE(self) == 0)
2095 return PyInt_FromLong(0);
2096
2097 e = p + PyString_GET_SIZE(self);
2098 for (; p < e; p++) {
2099 if (!isalnum(*p))
2100 return PyInt_FromLong(0);
2101 }
2102 return PyInt_FromLong(1);
2103}
2104
2105
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106static char isdigit__doc__[] =
2107"S.isdigit() -> int\n\
2108\n\
2109Return 1 if there are only digit characters in S,\n\
21100 otherwise.";
2111
2112static PyObject*
2113string_isdigit(PyStringObject *self, PyObject *args)
2114{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002115 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2116 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002117
2118 if (!PyArg_NoArgs(args))
2119 return NULL;
2120
2121 /* Shortcut for single character strings */
2122 if (PyString_GET_SIZE(self) == 1 &&
2123 isdigit(*p))
2124 return PyInt_FromLong(1);
2125
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002126 /* Special case for empty strings */
2127 if (PyString_GET_SIZE(self) == 0)
2128 return PyInt_FromLong(0);
2129
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130 e = p + PyString_GET_SIZE(self);
2131 for (; p < e; p++) {
2132 if (!isdigit(*p))
2133 return PyInt_FromLong(0);
2134 }
2135 return PyInt_FromLong(1);
2136}
2137
2138
2139static char islower__doc__[] =
2140"S.islower() -> int\n\
2141\n\
2142Return 1 if all cased characters in S are lowercase and there is\n\
2143at least one cased character in S, 0 otherwise.";
2144
2145static PyObject*
2146string_islower(PyStringObject *self, PyObject *args)
2147{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002148 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2149 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150 int cased;
2151
2152 if (!PyArg_NoArgs(args))
2153 return NULL;
2154
2155 /* Shortcut for single character strings */
2156 if (PyString_GET_SIZE(self) == 1)
2157 return PyInt_FromLong(islower(*p) != 0);
2158
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002159 /* Special case for empty strings */
2160 if (PyString_GET_SIZE(self) == 0)
2161 return PyInt_FromLong(0);
2162
Guido van Rossum4c08d552000-03-10 22:55:18 +00002163 e = p + PyString_GET_SIZE(self);
2164 cased = 0;
2165 for (; p < e; p++) {
2166 if (isupper(*p))
2167 return PyInt_FromLong(0);
2168 else if (!cased && islower(*p))
2169 cased = 1;
2170 }
2171 return PyInt_FromLong(cased);
2172}
2173
2174
2175static char isupper__doc__[] =
2176"S.isupper() -> int\n\
2177\n\
2178Return 1 if all cased characters in S are uppercase and there is\n\
2179at least one cased character in S, 0 otherwise.";
2180
2181static PyObject*
2182string_isupper(PyStringObject *self, PyObject *args)
2183{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002184 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2185 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186 int cased;
2187
2188 if (!PyArg_NoArgs(args))
2189 return NULL;
2190
2191 /* Shortcut for single character strings */
2192 if (PyString_GET_SIZE(self) == 1)
2193 return PyInt_FromLong(isupper(*p) != 0);
2194
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002195 /* Special case for empty strings */
2196 if (PyString_GET_SIZE(self) == 0)
2197 return PyInt_FromLong(0);
2198
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199 e = p + PyString_GET_SIZE(self);
2200 cased = 0;
2201 for (; p < e; p++) {
2202 if (islower(*p))
2203 return PyInt_FromLong(0);
2204 else if (!cased && isupper(*p))
2205 cased = 1;
2206 }
2207 return PyInt_FromLong(cased);
2208}
2209
2210
2211static char istitle__doc__[] =
2212"S.istitle() -> int\n\
2213\n\
2214Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2215may only follow uncased characters and lowercase characters only cased\n\
2216ones. Return 0 otherwise.";
2217
2218static PyObject*
2219string_istitle(PyStringObject *self, PyObject *args)
2220{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002221 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2222 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002223 int cased, previous_is_cased;
2224
2225 if (!PyArg_NoArgs(args))
2226 return NULL;
2227
2228 /* Shortcut for single character strings */
2229 if (PyString_GET_SIZE(self) == 1)
2230 return PyInt_FromLong(isupper(*p) != 0);
2231
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002232 /* Special case for empty strings */
2233 if (PyString_GET_SIZE(self) == 0)
2234 return PyInt_FromLong(0);
2235
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 e = p + PyString_GET_SIZE(self);
2237 cased = 0;
2238 previous_is_cased = 0;
2239 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002240 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241
2242 if (isupper(ch)) {
2243 if (previous_is_cased)
2244 return PyInt_FromLong(0);
2245 previous_is_cased = 1;
2246 cased = 1;
2247 }
2248 else if (islower(ch)) {
2249 if (!previous_is_cased)
2250 return PyInt_FromLong(0);
2251 previous_is_cased = 1;
2252 cased = 1;
2253 }
2254 else
2255 previous_is_cased = 0;
2256 }
2257 return PyInt_FromLong(cased);
2258}
2259
2260
2261static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002262"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002263\n\
2264Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002265Line breaks are not included in the resulting list unless keepends\n\
2266is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002267
2268#define SPLIT_APPEND(data, left, right) \
2269 str = PyString_FromStringAndSize(data + left, right - left); \
2270 if (!str) \
2271 goto onError; \
2272 if (PyList_Append(list, str)) { \
2273 Py_DECREF(str); \
2274 goto onError; \
2275 } \
2276 else \
2277 Py_DECREF(str);
2278
2279static PyObject*
2280string_splitlines(PyStringObject *self, PyObject *args)
2281{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002282 register int i;
2283 register int j;
2284 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002285 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 PyObject *list;
2287 PyObject *str;
2288 char *data;
2289
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002290 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002291 return NULL;
2292
2293 data = PyString_AS_STRING(self);
2294 len = PyString_GET_SIZE(self);
2295
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296 list = PyList_New(0);
2297 if (!list)
2298 goto onError;
2299
2300 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002301 int eol;
2302
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 /* Find a line and append it */
2304 while (i < len && data[i] != '\n' && data[i] != '\r')
2305 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306
2307 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002308 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 if (i < len) {
2310 if (data[i] == '\r' && i + 1 < len &&
2311 data[i+1] == '\n')
2312 i += 2;
2313 else
2314 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002315 if (keepends)
2316 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002318 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002319 j = i;
2320 }
2321 if (j < len) {
2322 SPLIT_APPEND(data, j, len);
2323 }
2324
2325 return list;
2326
2327 onError:
2328 Py_DECREF(list);
2329 return NULL;
2330}
2331
2332#undef SPLIT_APPEND
2333
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334
2335static PyMethodDef
2336string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 /* Counterparts of the obsolete stropmodule functions; except
2338 string.maketrans(). */
2339 {"join", (PyCFunction)string_join, 1, join__doc__},
2340 {"split", (PyCFunction)string_split, 1, split__doc__},
2341 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2342 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2343 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2344 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2345 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2346 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2347 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002348 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2349 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2351 {"count", (PyCFunction)string_count, 1, count__doc__},
2352 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2353 {"find", (PyCFunction)string_find, 1, find__doc__},
2354 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2357 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2358 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2359 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2361 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2362 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002363 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2364 {"title", (PyCFunction)string_title, 1, title__doc__},
2365 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2366 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2367 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002368 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2370 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2371#if 0
2372 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2373#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374 {NULL, NULL} /* sentinel */
2375};
2376
2377static PyObject *
2378string_getattr(s, name)
2379 PyStringObject *s;
2380 char *name;
2381{
2382 return Py_FindMethod(string_methods, (PyObject*)s, name);
2383}
2384
2385
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002386PyTypeObject PyString_Type = {
2387 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002388 0,
2389 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002390 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002391 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002392 (destructor)string_dealloc, /*tp_dealloc*/
2393 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002395 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002396 (cmpfunc)string_compare, /*tp_compare*/
2397 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002398 0, /*tp_as_number*/
2399 &string_as_sequence, /*tp_as_sequence*/
2400 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002401 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002402 0, /*tp_call*/
2403 0, /*tp_str*/
2404 0, /*tp_getattro*/
2405 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002406 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002407 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002408 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002409};
2410
2411void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002412PyString_Concat(pv, w)
2413 register PyObject **pv;
2414 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002415{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002416 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002417 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002418 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002419 if (w == NULL || !PyString_Check(*pv)) {
2420 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002421 *pv = NULL;
2422 return;
2423 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002424 v = string_concat((PyStringObject *) *pv, w);
2425 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002426 *pv = v;
2427}
2428
Guido van Rossum013142a1994-08-30 08:19:36 +00002429void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002430PyString_ConcatAndDel(pv, w)
2431 register PyObject **pv;
2432 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002433{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002434 PyString_Concat(pv, w);
2435 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002436}
2437
2438
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002439/* The following function breaks the notion that strings are immutable:
2440 it changes the size of a string. We get away with this only if there
2441 is only one module referencing the object. You can also think of it
2442 as creating a new string object and destroying the old one, only
2443 more efficiently. In any case, don't use this if the string may
2444 already be known to some other part of the code... */
2445
2446int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002447_PyString_Resize(pv, newsize)
2448 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002449 int newsize;
2450{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002451 register PyObject *v;
2452 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002453 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002454 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002455 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002456 Py_DECREF(v);
2457 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002458 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002459 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002460 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002461#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002462 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002463#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002464 _Py_ForgetReference(v);
2465 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002466 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002467 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002468 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002469 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002470 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002471 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002472 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002473 _Py_NewReference(*pv);
2474 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002475 sv->ob_size = newsize;
2476 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002477 return 0;
2478}
Guido van Rossume5372401993-03-16 12:15:04 +00002479
2480/* Helpers for formatstring */
2481
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002482static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002483getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002484 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002485 int arglen;
2486 int *p_argidx;
2487{
2488 int argidx = *p_argidx;
2489 if (argidx < arglen) {
2490 (*p_argidx)++;
2491 if (arglen < 0)
2492 return args;
2493 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002494 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002495 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002496 PyErr_SetString(PyExc_TypeError,
2497 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002498 return NULL;
2499}
2500
2501#define F_LJUST (1<<0)
2502#define F_SIGN (1<<1)
2503#define F_BLANK (1<<2)
2504#define F_ALT (1<<3)
2505#define F_ZERO (1<<4)
2506
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002507static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002508formatfloat(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002509 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002510 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002511 int flags;
2512 int prec;
2513 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002514 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002515{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002516 /* fmt = '%#.' + `prec` + `type`
2517 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002518 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002519 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002520 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002521 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002522 if (prec < 0)
2523 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002524 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2525 type = 'g';
2526 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002527 /* worst case length calc to ensure no buffer overrun:
2528 fmt = %#.<prec>g
2529 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2530 for any double rep.)
2531 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2532 If prec=0 the effective precision is 1 (the leading digit is
2533 always given), therefore increase by one to 10+prec. */
2534 if (buflen <= (size_t)10 + (size_t)prec) {
2535 PyErr_SetString(PyExc_OverflowError,
2536 "formatted float is too long (precision too long?)");
2537 return -1;
2538 }
Guido van Rossume5372401993-03-16 12:15:04 +00002539 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002540 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002541}
2542
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002543static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002544formatint(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002545 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002546 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002547 int flags;
2548 int prec;
2549 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002550 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002551{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002552 /* fmt = '%#.' + `prec` + 'l' + `type`
2553 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002554 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002555 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002556 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002557 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002558 if (prec < 0)
2559 prec = 1;
2560 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002561 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2562 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2563 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2564 PyErr_SetString(PyExc_OverflowError,
2565 "formatted integer is too long (precision too long?)");
2566 return -1;
2567 }
Guido van Rossume5372401993-03-16 12:15:04 +00002568 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002569 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002570}
2571
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002572static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002573formatchar(buf, buflen, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002574 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002575 size_t buflen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002576 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002577{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002578 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002579 if (PyString_Check(v)) {
2580 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002581 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002582 }
2583 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002584 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002585 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002586 }
2587 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002588 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002589}
2590
Guido van Rossum013142a1994-08-30 08:19:36 +00002591
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002592/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2593
2594 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2595 chars are formatted. XXX This is a magic number. Each formatting
2596 routine does bounds checking to ensure no overflow, but a better
2597 solution may be to malloc a buffer of appropriate size for each
2598 format. For now, the current solution is sufficient.
2599*/
2600#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002601
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002602PyObject *
2603PyString_Format(format, args)
2604 PyObject *format;
2605 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002606{
2607 char *fmt, *res;
2608 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002609 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002610 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002611 PyObject *dict = NULL;
2612 if (format == NULL || !PyString_Check(format) || args == NULL) {
2613 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002614 return NULL;
2615 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002616 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002617 fmt = PyString_AsString(format);
2618 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002619 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002620 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002621 if (result == NULL)
2622 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002623 res = PyString_AsString(result);
2624 if (PyTuple_Check(args)) {
2625 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002626 argidx = 0;
2627 }
2628 else {
2629 arglen = -1;
2630 argidx = -2;
2631 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002632 if (args->ob_type->tp_as_mapping)
2633 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002634 while (--fmtcnt >= 0) {
2635 if (*fmt != '%') {
2636 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002637 rescnt = fmtcnt + 100;
2638 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002639 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002640 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002641 res = PyString_AsString(result)
2642 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002643 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002644 }
2645 *res++ = *fmt++;
2646 }
2647 else {
2648 /* Got a format specifier */
2649 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002650 int width = -1;
2651 int prec = -1;
2652 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002653 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002654 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002655 PyObject *v = NULL;
2656 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002657 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002658 int sign;
2659 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002660 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002661 char *fmt_start = fmt;
2662
Guido van Rossumda9c2711996-12-05 21:58:58 +00002663 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002664 if (*fmt == '(') {
2665 char *keystart;
2666 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002667 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002668 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002669
2670 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002671 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002672 "format requires a mapping");
2673 goto error;
2674 }
2675 ++fmt;
2676 --fmtcnt;
2677 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002678 /* Skip over balanced parentheses */
2679 while (pcount > 0 && --fmtcnt >= 0) {
2680 if (*fmt == ')')
2681 --pcount;
2682 else if (*fmt == '(')
2683 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002684 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002685 }
2686 keylen = fmt - keystart - 1;
2687 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002688 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002689 "incomplete format key");
2690 goto error;
2691 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002692 key = PyString_FromStringAndSize(keystart,
2693 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002694 if (key == NULL)
2695 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002696 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002697 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002698 args_owned = 0;
2699 }
2700 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002701 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002702 if (args == NULL) {
2703 goto error;
2704 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002705 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002706 arglen = -1;
2707 argidx = -2;
2708 }
Guido van Rossume5372401993-03-16 12:15:04 +00002709 while (--fmtcnt >= 0) {
2710 switch (c = *fmt++) {
2711 case '-': flags |= F_LJUST; continue;
2712 case '+': flags |= F_SIGN; continue;
2713 case ' ': flags |= F_BLANK; continue;
2714 case '#': flags |= F_ALT; continue;
2715 case '0': flags |= F_ZERO; continue;
2716 }
2717 break;
2718 }
2719 if (c == '*') {
2720 v = getnextarg(args, arglen, &argidx);
2721 if (v == NULL)
2722 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002723 if (!PyInt_Check(v)) {
2724 PyErr_SetString(PyExc_TypeError,
2725 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002726 goto error;
2727 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002728 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002729 if (width < 0) {
2730 flags |= F_LJUST;
2731 width = -width;
2732 }
Guido van Rossume5372401993-03-16 12:15:04 +00002733 if (--fmtcnt >= 0)
2734 c = *fmt++;
2735 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002736 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002737 width = c - '0';
2738 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002739 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002740 if (!isdigit(c))
2741 break;
2742 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002743 PyErr_SetString(
2744 PyExc_ValueError,
2745 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002746 goto error;
2747 }
2748 width = width*10 + (c - '0');
2749 }
2750 }
2751 if (c == '.') {
2752 prec = 0;
2753 if (--fmtcnt >= 0)
2754 c = *fmt++;
2755 if (c == '*') {
2756 v = getnextarg(args, arglen, &argidx);
2757 if (v == NULL)
2758 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002759 if (!PyInt_Check(v)) {
2760 PyErr_SetString(
2761 PyExc_TypeError,
2762 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002763 goto error;
2764 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002765 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002766 if (prec < 0)
2767 prec = 0;
2768 if (--fmtcnt >= 0)
2769 c = *fmt++;
2770 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002771 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002772 prec = c - '0';
2773 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002774 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002775 if (!isdigit(c))
2776 break;
2777 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002778 PyErr_SetString(
2779 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002780 "prec too big");
2781 goto error;
2782 }
2783 prec = prec*10 + (c - '0');
2784 }
2785 }
2786 } /* prec */
2787 if (fmtcnt >= 0) {
2788 if (c == 'h' || c == 'l' || c == 'L') {
2789 size = c;
2790 if (--fmtcnt >= 0)
2791 c = *fmt++;
2792 }
2793 }
2794 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002795 PyErr_SetString(PyExc_ValueError,
2796 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002797 goto error;
2798 }
2799 if (c != '%') {
2800 v = getnextarg(args, arglen, &argidx);
2801 if (v == NULL)
2802 goto error;
2803 }
2804 sign = 0;
2805 fill = ' ';
2806 switch (c) {
2807 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002808 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002809 len = 1;
2810 break;
2811 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002812 case 'r':
2813 if (PyUnicode_Check(v)) {
2814 fmt = fmt_start;
2815 goto unicode;
2816 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002817 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002818 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002819 else
2820 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002821 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002822 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002823 if (!PyString_Check(temp)) {
2824 PyErr_SetString(PyExc_TypeError,
2825 "%s argument has non-string str()");
2826 goto error;
2827 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002828 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002829 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002830 if (prec >= 0 && len > prec)
2831 len = prec;
2832 break;
2833 case 'i':
2834 case 'd':
2835 case 'u':
2836 case 'o':
2837 case 'x':
2838 case 'X':
2839 if (c == 'i')
2840 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002841 pbuf = formatbuf;
2842 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002843 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002844 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002845 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002846 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002847 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002848 if ((flags&F_ALT) &&
2849 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002850 pbuf[0] == '0' && pbuf[1] == c) {
2851 *res++ = *pbuf++;
2852 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002853 rescnt -= 2;
2854 len -= 2;
2855 width -= 2;
2856 if (width < 0)
2857 width = 0;
2858 }
2859 }
Guido van Rossume5372401993-03-16 12:15:04 +00002860 break;
2861 case 'e':
2862 case 'E':
2863 case 'f':
2864 case 'g':
2865 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002866 pbuf = formatbuf;
2867 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002868 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002869 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002870 sign = 1;
2871 if (flags&F_ZERO)
2872 fill = '0';
2873 break;
2874 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002875 pbuf = formatbuf;
2876 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002877 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002878 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002879 break;
2880 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002881 PyErr_Format(PyExc_ValueError,
2882 "unsupported format character '%c' (0x%x)",
2883 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002884 goto error;
2885 }
2886 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002887 if (*pbuf == '-' || *pbuf == '+') {
2888 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002889 len--;
2890 }
2891 else if (flags & F_SIGN)
2892 sign = '+';
2893 else if (flags & F_BLANK)
2894 sign = ' ';
2895 else
2896 sign = '\0';
2897 }
2898 if (width < len)
2899 width = len;
2900 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002901 reslen -= rescnt;
2902 rescnt = width + fmtcnt + 100;
2903 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002904 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002905 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002906 res = PyString_AsString(result)
2907 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002908 }
2909 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002910 if (fill != ' ')
2911 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002912 rescnt--;
2913 if (width > len)
2914 width--;
2915 }
2916 if (width > len && !(flags&F_LJUST)) {
2917 do {
2918 --rescnt;
2919 *res++ = fill;
2920 } while (--width > len);
2921 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002922 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002923 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002924 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002925 res += len;
2926 rescnt -= len;
2927 while (--width >= len) {
2928 --rescnt;
2929 *res++ = ' ';
2930 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002931 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002932 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002933 "not all arguments converted");
2934 goto error;
2935 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002936 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002937 } /* '%' */
2938 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002939 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002940 PyErr_SetString(PyExc_TypeError,
2941 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002942 goto error;
2943 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002944 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002945 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002946 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002947 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002948 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002949
2950 unicode:
2951 if (args_owned) {
2952 Py_DECREF(args);
2953 args_owned = 0;
2954 }
2955 /* Fiddle args right (remove the first argidx-1 arguments) */
2956 --argidx;
2957 if (PyTuple_Check(orig_args) && argidx > 0) {
2958 PyObject *v;
2959 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2960 v = PyTuple_New(n);
2961 if (v == NULL)
2962 goto error;
2963 while (--n >= 0) {
2964 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2965 Py_INCREF(w);
2966 PyTuple_SET_ITEM(v, n, w);
2967 }
2968 args = v;
2969 } else {
2970 Py_INCREF(orig_args);
2971 args = orig_args;
2972 }
2973 /* Paste rest of format string to what we have of the result
2974 string; we reuse result for this */
2975 rescnt = res - PyString_AS_STRING(result);
2976 fmtcnt = PyString_GET_SIZE(format) - \
2977 (fmt - PyString_AS_STRING(format));
2978 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2979 Py_DECREF(args);
2980 goto error;
2981 }
2982 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2983 format = result;
2984 /* Let Unicode do its magic */
2985 result = PyUnicode_Format(format, args);
2986 Py_DECREF(format);
2987 Py_DECREF(args);
2988 return result;
2989
Guido van Rossume5372401993-03-16 12:15:04 +00002990 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002991 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002992 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002993 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002994 }
Guido van Rossume5372401993-03-16 12:15:04 +00002995 return NULL;
2996}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002997
2998
2999#ifdef INTERN_STRINGS
3000
3001static PyObject *interned;
3002
3003void
3004PyString_InternInPlace(p)
3005 PyObject **p;
3006{
3007 register PyStringObject *s = (PyStringObject *)(*p);
3008 PyObject *t;
3009 if (s == NULL || !PyString_Check(s))
3010 Py_FatalError("PyString_InternInPlace: strings only please!");
3011 if ((t = s->ob_sinterned) != NULL) {
3012 if (t == (PyObject *)s)
3013 return;
3014 Py_INCREF(t);
3015 *p = t;
3016 Py_DECREF(s);
3017 return;
3018 }
3019 if (interned == NULL) {
3020 interned = PyDict_New();
3021 if (interned == NULL)
3022 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003023 }
3024 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3025 Py_INCREF(t);
3026 *p = s->ob_sinterned = t;
3027 Py_DECREF(s);
3028 return;
3029 }
3030 t = (PyObject *)s;
3031 if (PyDict_SetItem(interned, t, t) == 0) {
3032 s->ob_sinterned = t;
3033 return;
3034 }
3035 PyErr_Clear();
3036}
3037
3038
3039PyObject *
3040PyString_InternFromString(cp)
3041 const char *cp;
3042{
3043 PyObject *s = PyString_FromString(cp);
3044 if (s == NULL)
3045 return NULL;
3046 PyString_InternInPlace(&s);
3047 return s;
3048}
3049
3050#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003051
3052void
3053PyString_Fini()
3054{
3055 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003056 for (i = 0; i < UCHAR_MAX + 1; i++) {
3057 Py_XDECREF(characters[i]);
3058 characters[i] = NULL;
3059 }
3060#ifndef DONT_SHARE_SHORT_STRINGS
3061 Py_XDECREF(nullstring);
3062 nullstring = NULL;
3063#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003064#ifdef INTERN_STRINGS
3065 if (interned) {
3066 int pos, changed;
3067 PyObject *key, *value;
3068 do {
3069 changed = 0;
3070 pos = 0;
3071 while (PyDict_Next(interned, &pos, &key, &value)) {
3072 if (key->ob_refcnt == 2 && key == value) {
3073 PyDict_DelItem(interned, key);
3074 changed = 1;
3075 }
3076 }
3077 } while (changed);
3078 }
3079#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003080}