blob: 2667fa9e70f6dd94cf2b3c32e85a16af6b1143a2 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* String object implementation */
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014
Guido van Rossum71160aa1997-06-03 18:03:18 +000015#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000016#include <ctype.h>
17
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018#ifdef COUNT_ALLOCS
19int null_strings, one_strings;
20#endif
21
Guido van Rossum03093a21994-09-28 15:51:32 +000022#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000023#include <limits.h>
24#else
25#ifndef UCHAR_MAX
26#define UCHAR_MAX 255
27#endif
28#endif
29
Guido van Rossumc0b618a1997-05-02 03:12:38 +000030static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000031#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000032static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000033#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000034
35/*
36 Newsizedstringobject() and newstringobject() try in certain cases
37 to share string objects. When the size of the string is zero,
38 these routines always return a pointer to the same string object;
39 when the size is one, they return a pointer to an already existing
40 object if the contents of the string is known. For
41 newstringobject() this is always the case, for
42 newsizedstringobject() this is the case when the first argument in
43 not NULL.
44 A common practice to allocate a string and then fill it in or
45 change it must be done carefully. It is only allowed to change the
46 contents of the string if the obect was gotten from
47 newsizedstringobject() with a NULL first argument, because in the
48 future these routines may try to do even more sharing of objects.
49*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000050PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000051PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000052{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000071#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
73 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000074 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000075 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef CACHE_HASH
80 op->ob_shash = -1;
81#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000082#ifdef INTERN_STRINGS
83 op->ob_sinterned = NULL;
84#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 if (str != NULL)
86 memcpy(op->ob_sval, str, size);
87 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 if (size == 0) {
90 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
93 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000103 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000105 if (size > INT_MAX) {
106 PyErr_SetString(PyExc_OverflowError,
107 "string is too long for a Python string");
108 return NULL;
109 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000110#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000111 if (size == 0 && (op = nullstring) != NULL) {
112#ifdef COUNT_ALLOCS
113 null_strings++;
114#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000115 Py_INCREF(op);
116 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 }
118 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
119#ifdef COUNT_ALLOCS
120 one_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000125#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
127 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133#ifdef CACHE_HASH
134 op->ob_shash = -1;
135#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000136#ifdef INTERN_STRINGS
137 op->ob_sinterned = NULL;
138#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000140#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000148#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150}
151
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000152PyObject *PyString_Decode(const char *s,
153 int size,
154 const char *encoding,
155 const char *errors)
156{
157 PyObject *buffer = NULL, *str;
158
159 if (encoding == NULL)
160 encoding = PyUnicode_GetDefaultEncoding();
161
162 /* Decode via the codec registry */
163 buffer = PyBuffer_FromMemory((void *)s, size);
164 if (buffer == NULL)
165 goto onError;
166 str = PyCodec_Decode(buffer, encoding, errors);
167 if (str == NULL)
168 goto onError;
169 /* Convert Unicode to a string using the default encoding */
170 if (PyUnicode_Check(str)) {
171 PyObject *temp = str;
172 str = PyUnicode_AsEncodedString(str, NULL, NULL);
173 Py_DECREF(temp);
174 if (str == NULL)
175 goto onError;
176 }
177 if (!PyString_Check(str)) {
178 PyErr_Format(PyExc_TypeError,
179 "decoder did not return an string object (type=%.400s)",
180 str->ob_type->tp_name);
181 Py_DECREF(str);
182 goto onError;
183 }
184 Py_DECREF(buffer);
185 return str;
186
187 onError:
188 Py_XDECREF(buffer);
189 return NULL;
190}
191
192PyObject *PyString_Encode(const char *s,
193 int size,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v, *str;
198
199 str = PyString_FromStringAndSize(s, size);
200 if (str == NULL)
201 return NULL;
202 v = PyString_AsEncodedString(str, encoding, errors);
203 Py_DECREF(str);
204 return v;
205}
206
207PyObject *PyString_AsEncodedString(PyObject *str,
208 const char *encoding,
209 const char *errors)
210{
211 PyObject *v;
212
213 if (!PyString_Check(str)) {
214 PyErr_BadArgument();
215 goto onError;
216 }
217
218 if (encoding == NULL)
219 encoding = PyUnicode_GetDefaultEncoding();
220
221 /* Encode via the codec registry */
222 v = PyCodec_Encode(str, encoding, errors);
223 if (v == NULL)
224 goto onError;
225 /* Convert Unicode to a string using the default encoding */
226 if (PyUnicode_Check(v)) {
227 PyObject *temp = v;
228 v = PyUnicode_AsEncodedString(v, NULL, NULL);
229 Py_DECREF(temp);
230 if (v == NULL)
231 goto onError;
232 }
233 if (!PyString_Check(v)) {
234 PyErr_Format(PyExc_TypeError,
235 "encoder did not return a string object (type=%.400s)",
236 v->ob_type->tp_name);
237 Py_DECREF(v);
238 goto onError;
239 }
240 return v;
241
242 onError:
243 return NULL;
244}
245
Guido van Rossum234f9421993-06-17 12:35:49 +0000246static void
Fred Drakeba096332000-07-09 07:04:36 +0000247string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000248{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000249 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000250}
251
Guido van Rossumd7047b31995-01-02 19:07:15 +0000252int
Fred Drakeba096332000-07-09 07:04:36 +0000253PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000255 if (!PyString_Check(op)) {
256 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000257 return -1;
258 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000259 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000263PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000265 if (!PyString_Check(op)) {
266 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 return NULL;
268 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000269 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000270}
271
272/* Methods */
273
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000274static int
Fred Drakeba096332000-07-09 07:04:36 +0000275string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276{
277 int i;
278 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000279 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000280 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000283 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000285
286 /* figure out which quote to use; single is prefered */
287 quote = '\'';
288 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
289 quote = '"';
290
291 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292 for (i = 0; i < op->ob_size; i++) {
293 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000294 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000295 fprintf(fp, "\\%c", c);
296 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000297 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000299 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000301 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000302 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303}
304
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000305static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000306string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000308 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
309 PyObject *v;
310 if (newsize > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "string is too large to make repr");
313 }
314 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000317 }
318 else {
319 register int i;
320 register char c;
321 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000322 int quote;
323
324 /* figure out which quote to use; single is prefered */
325 quote = '\'';
326 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
327 quote = '"';
328
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000329 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000330 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000331 for (i = 0; i < op->ob_size; i++) {
332 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 *p++ = '\\', *p++ = c;
335 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 while (*p != '\0')
338 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339 }
340 else
341 *p++ = c;
342 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 _PyString_Resize(
346 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000347 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349}
350
351static int
Fred Drakeba096332000-07-09 07:04:36 +0000352string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000353{
354 return a->ob_size;
355}
356
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000357static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000358string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359{
360 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000361 register PyStringObject *op;
362 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000363 if (PyUnicode_Check(bb))
364 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000365 PyErr_Format(PyExc_TypeError,
366 "cannot add type \"%.200s\" to string",
367 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 return NULL;
369 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000370#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371 /* Optimize cases with empty left or right operand */
372 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000373 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000374 return bb;
375 }
376 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 Py_INCREF(a);
378 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 }
380 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000381 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000382 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000383 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000384 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000385 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000386 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000387#ifdef CACHE_HASH
388 op->ob_shash = -1;
389#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000390#ifdef INTERN_STRINGS
391 op->ob_sinterned = NULL;
392#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000393 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
394 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
395 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000396 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397#undef b
398}
399
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000400static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000401string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402{
403 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000404 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000405 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406 if (n < 0)
407 n = 0;
408 size = a->ob_size * n;
409 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000410 Py_INCREF(a);
411 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000413 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000414 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000415 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000416 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000418 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000419#ifdef CACHE_HASH
420 op->ob_shash = -1;
421#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000422#ifdef INTERN_STRINGS
423 op->ob_sinterned = NULL;
424#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000425 for (i = 0; i < size; i += a->ob_size)
426 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
427 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000429}
430
431/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
432
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000434string_slice(register PyStringObject *a, register int i, register int j)
435 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436{
437 if (i < 0)
438 i = 0;
439 if (j < 0)
440 j = 0; /* Avoid signed/unsigned bug in next line */
441 if (j > a->ob_size)
442 j = a->ob_size;
443 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000444 Py_INCREF(a);
445 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000446 }
447 if (j < i)
448 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000449 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450}
451
Guido van Rossum9284a572000-03-07 15:53:43 +0000452static int
Fred Drakeba096332000-07-09 07:04:36 +0000453string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000454{
455 register char *s, *end;
456 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000457 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000458 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000459 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000460 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000461 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000462 return -1;
463 }
464 c = PyString_AsString(el)[0];
465 s = PyString_AsString(a);
466 end = s + PyString_Size(a);
467 while (s < end) {
468 if (c == *s++)
469 return 1;
470 }
471 return 0;
472}
473
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000475string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000476{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000477 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000478 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000479 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000480 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 return NULL;
482 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000483 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000484 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000485#ifdef COUNT_ALLOCS
486 if (v != NULL)
487 one_strings++;
488#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000489 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000491 if (v == NULL)
492 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000493 characters[c] = (PyStringObject *) v;
494 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000495 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000496 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000497 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000498}
499
500static int
Fred Drakeba096332000-07-09 07:04:36 +0000501string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000502{
Guido van Rossum253919f1991-02-13 23:18:39 +0000503 int len_a = a->ob_size, len_b = b->ob_size;
504 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000505 int cmp;
506 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000507 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000508 if (cmp == 0)
509 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
510 if (cmp != 0)
511 return cmp;
512 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000513 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
Guido van Rossum9bfef441993-03-29 10:43:31 +0000516static long
Fred Drakeba096332000-07-09 07:04:36 +0000517string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000518{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000519 register int len;
520 register unsigned char *p;
521 register long x;
522
523#ifdef CACHE_HASH
524 if (a->ob_shash != -1)
525 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000526#ifdef INTERN_STRINGS
527 if (a->ob_sinterned != NULL)
528 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000529 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000530#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000531#endif
532 len = a->ob_size;
533 p = (unsigned char *) a->ob_sval;
534 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000535 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000536 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000537 x ^= a->ob_size;
538 if (x == -1)
539 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000540#ifdef CACHE_HASH
541 a->ob_shash = x;
542#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000543 return x;
544}
545
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000546static int
Fred Drakeba096332000-07-09 07:04:36 +0000547string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000548{
549 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000550 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000551 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000552 return -1;
553 }
554 *ptr = (void *)self->ob_sval;
555 return self->ob_size;
556}
557
558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000560{
Guido van Rossum045e6881997-09-08 18:30:11 +0000561 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000562 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000563 return -1;
564}
565
566static int
Fred Drakeba096332000-07-09 07:04:36 +0000567string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000568{
569 if ( lenp )
570 *lenp = self->ob_size;
571 return 1;
572}
573
Guido van Rossum1db70701998-10-08 02:18:52 +0000574static int
Fred Drakeba096332000-07-09 07:04:36 +0000575string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000576{
577 if ( index != 0 ) {
578 PyErr_SetString(PyExc_SystemError,
579 "accessing non-existent string segment");
580 return -1;
581 }
582 *ptr = self->ob_sval;
583 return self->ob_size;
584}
585
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000587 (inquiry)string_length, /*sq_length*/
588 (binaryfunc)string_concat, /*sq_concat*/
589 (intargfunc)string_repeat, /*sq_repeat*/
590 (intargfunc)string_item, /*sq_item*/
591 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000592 0, /*sq_ass_item*/
593 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000594 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000595};
596
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000597static PyBufferProcs string_as_buffer = {
598 (getreadbufferproc)string_buffer_getreadbuf,
599 (getwritebufferproc)string_buffer_getwritebuf,
600 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000601 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000602};
603
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000604
605
606#define LEFTSTRIP 0
607#define RIGHTSTRIP 1
608#define BOTHSTRIP 2
609
610
611static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000612split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000613{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000614 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000615 PyObject* item;
616 PyObject *list = PyList_New(0);
617
618 if (list == NULL)
619 return NULL;
620
Guido van Rossum4c08d552000-03-10 22:55:18 +0000621 for (i = j = 0; i < len; ) {
622 while (i < len && isspace(Py_CHARMASK(s[i])))
623 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000624 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 while (i < len && !isspace(Py_CHARMASK(s[i])))
626 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000627 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000628 if (maxsplit-- <= 0)
629 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 item = PyString_FromStringAndSize(s+j, (int)(i-j));
631 if (item == NULL)
632 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000633 err = PyList_Append(list, item);
634 Py_DECREF(item);
635 if (err < 0)
636 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000637 while (i < len && isspace(Py_CHARMASK(s[i])))
638 i++;
639 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000640 }
641 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000642 if (j < len) {
643 item = PyString_FromStringAndSize(s+j, (int)(len - j));
644 if (item == NULL)
645 goto finally;
646 err = PyList_Append(list, item);
647 Py_DECREF(item);
648 if (err < 0)
649 goto finally;
650 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000651 return list;
652 finally:
653 Py_DECREF(list);
654 return NULL;
655}
656
657
658static char split__doc__[] =
659"S.split([sep [,maxsplit]]) -> list of strings\n\
660\n\
661Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000662delimiter string. If maxsplit is given, at most maxsplit\n\
663splits are done. If sep is not specified, any whitespace string\n\
664is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665
666static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000667string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000668{
669 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000670 int maxsplit = -1;
671 const char *s = PyString_AS_STRING(self), *sub;
672 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000673
Guido van Rossum4c08d552000-03-10 22:55:18 +0000674 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000675 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000676 if (maxsplit < 0)
677 maxsplit = INT_MAX;
678 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000679 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000680 if (PyString_Check(subobj)) {
681 sub = PyString_AS_STRING(subobj);
682 n = PyString_GET_SIZE(subobj);
683 }
684 else if (PyUnicode_Check(subobj))
685 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
686 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
687 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688 if (n == 0) {
689 PyErr_SetString(PyExc_ValueError, "empty separator");
690 return NULL;
691 }
692
693 list = PyList_New(0);
694 if (list == NULL)
695 return NULL;
696
697 i = j = 0;
698 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000699 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (maxsplit-- <= 0)
701 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000702 item = PyString_FromStringAndSize(s+j, (int)(i-j));
703 if (item == NULL)
704 goto fail;
705 err = PyList_Append(list, item);
706 Py_DECREF(item);
707 if (err < 0)
708 goto fail;
709 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000710 }
711 else
712 i++;
713 }
714 item = PyString_FromStringAndSize(s+j, (int)(len-j));
715 if (item == NULL)
716 goto fail;
717 err = PyList_Append(list, item);
718 Py_DECREF(item);
719 if (err < 0)
720 goto fail;
721
722 return list;
723
724 fail:
725 Py_DECREF(list);
726 return NULL;
727}
728
729
730static char join__doc__[] =
731"S.join(sequence) -> string\n\
732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000733Return a string which is the concatenation of the strings in the\n\
734sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000735
736static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000737string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000738{
739 char *sep = PyString_AS_STRING(self);
740 int seplen = PyString_GET_SIZE(self);
741 PyObject *res = NULL;
742 int reslen = 0;
743 char *p;
744 int seqlen = 0;
745 int sz = 100;
746 int i, slen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000747 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000748
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000749 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000750 return NULL;
751
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000752 seq = PySequence_Fast(orig, "");
753 if (seq == NULL) {
754 if (PyErr_ExceptionMatches(PyExc_TypeError))
755 PyErr_Format(PyExc_TypeError,
756 "sequence expected, %.80s found",
757 orig->ob_type->tp_name);
758 return NULL;
759 }
760
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000761 seqlen = PySequence_Length(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000763 item = PySequence_Fast_GET_ITEM(seq, 0);
764 Py_INCREF(item);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000765 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000766 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000767
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000768 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
769 return NULL;
770 p = PyString_AsString(res);
771
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000772 for (i = 0; i < seqlen; i++) {
773 item = PySequence_Fast_GET_ITEM(seq, i);
774 if (!PyString_Check(item)){
775 if (PyUnicode_Check(item)) {
776 Py_DECREF(res);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000777 return PyUnicode_Join((PyObject *)self,
778 seq);
779 }
780 PyErr_Format(PyExc_TypeError,
781 "sequence item %i: expected string, %.80s found",
782 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000783 goto finally;
784 }
785 slen = PyString_GET_SIZE(item);
786 while (reslen + slen + seplen >= sz) {
787 if (_PyString_Resize(&res, sz*2)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000788 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000789 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000790 sz *= 2;
791 p = PyString_AsString(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000792 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000793 if (i > 0) {
794 memcpy(p, sep, seplen);
795 p += seplen;
796 reslen += seplen;
797 }
798 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000799 p += slen;
800 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000801 }
802 if (_PyString_Resize(&res, reslen))
803 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000804 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000805 return res;
806
807 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000808 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000809 Py_DECREF(res);
810 return NULL;
811}
812
813
814
815static long
Fred Drakeba096332000-07-09 07:04:36 +0000816string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000817{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000818 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000819 int len = PyString_GET_SIZE(self);
820 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000821 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000822
Guido van Rossumc6821402000-05-08 14:08:05 +0000823 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
824 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000825 return -2;
826 if (PyString_Check(subobj)) {
827 sub = PyString_AS_STRING(subobj);
828 n = PyString_GET_SIZE(subobj);
829 }
830 else if (PyUnicode_Check(subobj))
831 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
832 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000833 return -2;
834
835 if (last > len)
836 last = len;
837 if (last < 0)
838 last += len;
839 if (last < 0)
840 last = 0;
841 if (i < 0)
842 i += len;
843 if (i < 0)
844 i = 0;
845
Guido van Rossum4c08d552000-03-10 22:55:18 +0000846 if (dir > 0) {
847 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000848 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000849 last -= n;
850 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000851 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000852 return (long)i;
853 }
854 else {
855 int j;
856
857 if (n == 0 && i <= last)
858 return (long)last;
859 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000860 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000861 return (long)j;
862 }
863
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000864 return -1;
865}
866
867
868static char find__doc__[] =
869"S.find(sub [,start [,end]]) -> int\n\
870\n\
871Return the lowest index in S where substring sub is found,\n\
872such that sub is contained within s[start,end]. Optional\n\
873arguments start and end are interpreted as in slice notation.\n\
874\n\
875Return -1 on failure.";
876
877static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000878string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000879{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000880 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000881 if (result == -2)
882 return NULL;
883 return PyInt_FromLong(result);
884}
885
886
887static char index__doc__[] =
888"S.index(sub [,start [,end]]) -> int\n\
889\n\
890Like S.find() but raise ValueError when the substring is not found.";
891
892static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000893string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000894{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000895 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000896 if (result == -2)
897 return NULL;
898 if (result == -1) {
899 PyErr_SetString(PyExc_ValueError,
900 "substring not found in string.index");
901 return NULL;
902 }
903 return PyInt_FromLong(result);
904}
905
906
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000907static char rfind__doc__[] =
908"S.rfind(sub [,start [,end]]) -> int\n\
909\n\
910Return the highest index in S where substring sub is found,\n\
911such that sub is contained within s[start,end]. Optional\n\
912arguments start and end are interpreted as in slice notation.\n\
913\n\
914Return -1 on failure.";
915
916static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000917string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000918{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000919 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000920 if (result == -2)
921 return NULL;
922 return PyInt_FromLong(result);
923}
924
925
926static char rindex__doc__[] =
927"S.rindex(sub [,start [,end]]) -> int\n\
928\n\
929Like S.rfind() but raise ValueError when the substring is not found.";
930
931static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000932string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000933{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000934 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000935 if (result == -2)
936 return NULL;
937 if (result == -1) {
938 PyErr_SetString(PyExc_ValueError,
939 "substring not found in string.rindex");
940 return NULL;
941 }
942 return PyInt_FromLong(result);
943}
944
945
946static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000947do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000948{
949 char *s = PyString_AS_STRING(self);
950 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000951
Guido van Rossum43713e52000-02-29 13:59:29 +0000952 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000953 return NULL;
954
955 i = 0;
956 if (striptype != RIGHTSTRIP) {
957 while (i < len && isspace(Py_CHARMASK(s[i]))) {
958 i++;
959 }
960 }
961
962 j = len;
963 if (striptype != LEFTSTRIP) {
964 do {
965 j--;
966 } while (j >= i && isspace(Py_CHARMASK(s[j])));
967 j++;
968 }
969
970 if (i == 0 && j == len) {
971 Py_INCREF(self);
972 return (PyObject*)self;
973 }
974 else
975 return PyString_FromStringAndSize(s+i, j-i);
976}
977
978
979static char strip__doc__[] =
980"S.strip() -> string\n\
981\n\
982Return a copy of the string S with leading and trailing\n\
983whitespace removed.";
984
985static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000986string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000987{
988 return do_strip(self, args, BOTHSTRIP);
989}
990
991
992static char lstrip__doc__[] =
993"S.lstrip() -> string\n\
994\n\
995Return a copy of the string S with leading whitespace removed.";
996
997static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000998string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000999{
1000 return do_strip(self, args, LEFTSTRIP);
1001}
1002
1003
1004static char rstrip__doc__[] =
1005"S.rstrip() -> string\n\
1006\n\
1007Return a copy of the string S with trailing whitespace removed.";
1008
1009static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001010string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001011{
1012 return do_strip(self, args, RIGHTSTRIP);
1013}
1014
1015
1016static char lower__doc__[] =
1017"S.lower() -> string\n\
1018\n\
1019Return a copy of the string S converted to lowercase.";
1020
1021static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001022string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001023{
1024 char *s = PyString_AS_STRING(self), *s_new;
1025 int i, n = PyString_GET_SIZE(self);
1026 PyObject *new;
1027
Guido van Rossum43713e52000-02-29 13:59:29 +00001028 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001029 return NULL;
1030 new = PyString_FromStringAndSize(NULL, n);
1031 if (new == NULL)
1032 return NULL;
1033 s_new = PyString_AsString(new);
1034 for (i = 0; i < n; i++) {
1035 int c = Py_CHARMASK(*s++);
1036 if (isupper(c)) {
1037 *s_new = tolower(c);
1038 } else
1039 *s_new = c;
1040 s_new++;
1041 }
1042 return new;
1043}
1044
1045
1046static char upper__doc__[] =
1047"S.upper() -> string\n\
1048\n\
1049Return a copy of the string S converted to uppercase.";
1050
1051static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001052string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001053{
1054 char *s = PyString_AS_STRING(self), *s_new;
1055 int i, n = PyString_GET_SIZE(self);
1056 PyObject *new;
1057
Guido van Rossum43713e52000-02-29 13:59:29 +00001058 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001059 return NULL;
1060 new = PyString_FromStringAndSize(NULL, n);
1061 if (new == NULL)
1062 return NULL;
1063 s_new = PyString_AsString(new);
1064 for (i = 0; i < n; i++) {
1065 int c = Py_CHARMASK(*s++);
1066 if (islower(c)) {
1067 *s_new = toupper(c);
1068 } else
1069 *s_new = c;
1070 s_new++;
1071 }
1072 return new;
1073}
1074
1075
Guido van Rossum4c08d552000-03-10 22:55:18 +00001076static char title__doc__[] =
1077"S.title() -> string\n\
1078\n\
1079Return a titlecased version of S, i.e. words start with uppercase\n\
1080characters, all remaining cased characters have lowercase.";
1081
1082static PyObject*
1083string_title(PyUnicodeObject *self, PyObject *args)
1084{
1085 char *s = PyString_AS_STRING(self), *s_new;
1086 int i, n = PyString_GET_SIZE(self);
1087 int previous_is_cased = 0;
1088 PyObject *new;
1089
1090 if (!PyArg_ParseTuple(args, ":title"))
1091 return NULL;
1092 new = PyString_FromStringAndSize(NULL, n);
1093 if (new == NULL)
1094 return NULL;
1095 s_new = PyString_AsString(new);
1096 for (i = 0; i < n; i++) {
1097 int c = Py_CHARMASK(*s++);
1098 if (islower(c)) {
1099 if (!previous_is_cased)
1100 c = toupper(c);
1101 previous_is_cased = 1;
1102 } else if (isupper(c)) {
1103 if (previous_is_cased)
1104 c = tolower(c);
1105 previous_is_cased = 1;
1106 } else
1107 previous_is_cased = 0;
1108 *s_new++ = c;
1109 }
1110 return new;
1111}
1112
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113static char capitalize__doc__[] =
1114"S.capitalize() -> string\n\
1115\n\
1116Return a copy of the string S with only its first character\n\
1117capitalized.";
1118
1119static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001120string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001121{
1122 char *s = PyString_AS_STRING(self), *s_new;
1123 int i, n = PyString_GET_SIZE(self);
1124 PyObject *new;
1125
Guido van Rossum43713e52000-02-29 13:59:29 +00001126 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001127 return NULL;
1128 new = PyString_FromStringAndSize(NULL, n);
1129 if (new == NULL)
1130 return NULL;
1131 s_new = PyString_AsString(new);
1132 if (0 < n) {
1133 int c = Py_CHARMASK(*s++);
1134 if (islower(c))
1135 *s_new = toupper(c);
1136 else
1137 *s_new = c;
1138 s_new++;
1139 }
1140 for (i = 1; i < n; i++) {
1141 int c = Py_CHARMASK(*s++);
1142 if (isupper(c))
1143 *s_new = tolower(c);
1144 else
1145 *s_new = c;
1146 s_new++;
1147 }
1148 return new;
1149}
1150
1151
1152static char count__doc__[] =
1153"S.count(sub[, start[, end]]) -> int\n\
1154\n\
1155Return the number of occurrences of substring sub in string\n\
1156S[start:end]. Optional arguments start and end are\n\
1157interpreted as in slice notation.";
1158
1159static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001160string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001161{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001162 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001163 int len = PyString_GET_SIZE(self), n;
1164 int i = 0, last = INT_MAX;
1165 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001166 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001167
Guido van Rossumc6821402000-05-08 14:08:05 +00001168 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1169 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001170 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001171
Guido van Rossum4c08d552000-03-10 22:55:18 +00001172 if (PyString_Check(subobj)) {
1173 sub = PyString_AS_STRING(subobj);
1174 n = PyString_GET_SIZE(subobj);
1175 }
1176 else if (PyUnicode_Check(subobj))
1177 return PyInt_FromLong(
1178 PyUnicode_Count((PyObject *)self, subobj, i, last));
1179 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1180 return NULL;
1181
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001182 if (last > len)
1183 last = len;
1184 if (last < 0)
1185 last += len;
1186 if (last < 0)
1187 last = 0;
1188 if (i < 0)
1189 i += len;
1190 if (i < 0)
1191 i = 0;
1192 m = last + 1 - n;
1193 if (n == 0)
1194 return PyInt_FromLong((long) (m-i));
1195
1196 r = 0;
1197 while (i < m) {
1198 if (!memcmp(s+i, sub, n)) {
1199 r++;
1200 i += n;
1201 } else {
1202 i++;
1203 }
1204 }
1205 return PyInt_FromLong((long) r);
1206}
1207
1208
1209static char swapcase__doc__[] =
1210"S.swapcase() -> string\n\
1211\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001212Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213converted to lowercase and vice versa.";
1214
1215static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001216string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001217{
1218 char *s = PyString_AS_STRING(self), *s_new;
1219 int i, n = PyString_GET_SIZE(self);
1220 PyObject *new;
1221
Guido van Rossum43713e52000-02-29 13:59:29 +00001222 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001223 return NULL;
1224 new = PyString_FromStringAndSize(NULL, n);
1225 if (new == NULL)
1226 return NULL;
1227 s_new = PyString_AsString(new);
1228 for (i = 0; i < n; i++) {
1229 int c = Py_CHARMASK(*s++);
1230 if (islower(c)) {
1231 *s_new = toupper(c);
1232 }
1233 else if (isupper(c)) {
1234 *s_new = tolower(c);
1235 }
1236 else
1237 *s_new = c;
1238 s_new++;
1239 }
1240 return new;
1241}
1242
1243
1244static char translate__doc__[] =
1245"S.translate(table [,deletechars]) -> string\n\
1246\n\
1247Return a copy of the string S, where all characters occurring\n\
1248in the optional argument deletechars are removed, and the\n\
1249remaining characters have been mapped through the given\n\
1250translation table, which must be a string of length 256.";
1251
1252static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001253string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001254{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001255 register char *input, *output;
1256 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001257 register int i, c, changed = 0;
1258 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001259 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260 int inlen, tablen, dellen = 0;
1261 PyObject *result;
1262 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001263 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001264
Guido van Rossum4c08d552000-03-10 22:55:18 +00001265 if (!PyArg_ParseTuple(args, "O|O:translate",
1266 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001267 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001268
1269 if (PyString_Check(tableobj)) {
1270 table1 = PyString_AS_STRING(tableobj);
1271 tablen = PyString_GET_SIZE(tableobj);
1272 }
1273 else if (PyUnicode_Check(tableobj)) {
1274 /* Unicode .translate() does not support the deletechars
1275 parameter; instead a mapping to None will cause characters
1276 to be deleted. */
1277 if (delobj != NULL) {
1278 PyErr_SetString(PyExc_TypeError,
1279 "deletions are implemented differently for unicode");
1280 return NULL;
1281 }
1282 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1283 }
1284 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001285 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286
1287 if (delobj != NULL) {
1288 if (PyString_Check(delobj)) {
1289 del_table = PyString_AS_STRING(delobj);
1290 dellen = PyString_GET_SIZE(delobj);
1291 }
1292 else if (PyUnicode_Check(delobj)) {
1293 PyErr_SetString(PyExc_TypeError,
1294 "deletions are implemented differently for unicode");
1295 return NULL;
1296 }
1297 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1298 return NULL;
1299
1300 if (tablen != 256) {
1301 PyErr_SetString(PyExc_ValueError,
1302 "translation table must be 256 characters long");
1303 return NULL;
1304 }
1305 }
1306 else {
1307 del_table = NULL;
1308 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309 }
1310
1311 table = table1;
1312 inlen = PyString_Size(input_obj);
1313 result = PyString_FromStringAndSize((char *)NULL, inlen);
1314 if (result == NULL)
1315 return NULL;
1316 output_start = output = PyString_AsString(result);
1317 input = PyString_AsString(input_obj);
1318
1319 if (dellen == 0) {
1320 /* If no deletions are required, use faster code */
1321 for (i = inlen; --i >= 0; ) {
1322 c = Py_CHARMASK(*input++);
1323 if (Py_CHARMASK((*output++ = table[c])) != c)
1324 changed = 1;
1325 }
1326 if (changed)
1327 return result;
1328 Py_DECREF(result);
1329 Py_INCREF(input_obj);
1330 return input_obj;
1331 }
1332
1333 for (i = 0; i < 256; i++)
1334 trans_table[i] = Py_CHARMASK(table[i]);
1335
1336 for (i = 0; i < dellen; i++)
1337 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1338
1339 for (i = inlen; --i >= 0; ) {
1340 c = Py_CHARMASK(*input++);
1341 if (trans_table[c] != -1)
1342 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1343 continue;
1344 changed = 1;
1345 }
1346 if (!changed) {
1347 Py_DECREF(result);
1348 Py_INCREF(input_obj);
1349 return input_obj;
1350 }
1351 /* Fix the size of the resulting string */
1352 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1353 return NULL;
1354 return result;
1355}
1356
1357
1358/* What follows is used for implementing replace(). Perry Stoll. */
1359
1360/*
1361 mymemfind
1362
1363 strstr replacement for arbitrary blocks of memory.
1364
Barry Warsaw51ac5802000-03-20 16:36:48 +00001365 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001366 contents of memory pointed to by PAT. Returns the index into MEM if
1367 found, or -1 if not found. If len of PAT is greater than length of
1368 MEM, the function returns -1.
1369*/
1370static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001371mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372{
1373 register int ii;
1374
1375 /* pattern can not occur in the last pat_len-1 chars */
1376 len -= pat_len;
1377
1378 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001379 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380 return ii;
1381 }
1382 }
1383 return -1;
1384}
1385
1386/*
1387 mymemcnt
1388
1389 Return the number of distinct times PAT is found in MEM.
1390 meaning mem=1111 and pat==11 returns 2.
1391 mem=11111 and pat==11 also return 2.
1392 */
1393static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001394mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395{
1396 register int offset = 0;
1397 int nfound = 0;
1398
1399 while (len >= 0) {
1400 offset = mymemfind(mem, len, pat, pat_len);
1401 if (offset == -1)
1402 break;
1403 mem += offset + pat_len;
1404 len -= offset + pat_len;
1405 nfound++;
1406 }
1407 return nfound;
1408}
1409
1410/*
1411 mymemreplace
1412
1413 Return a string in which all occurences of PAT in memory STR are
1414 replaced with SUB.
1415
1416 If length of PAT is less than length of STR or there are no occurences
1417 of PAT in STR, then the original string is returned. Otherwise, a new
1418 string is allocated here and returned.
1419
1420 on return, out_len is:
1421 the length of output string, or
1422 -1 if the input string is returned, or
1423 unchanged if an error occurs (no memory).
1424
1425 return value is:
1426 the new string allocated locally, or
1427 NULL if an error occurred.
1428*/
1429static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001430mymemreplace(const char *str, int len, /* input string */
1431 const char *pat, int pat_len, /* pattern string to find */
1432 const char *sub, int sub_len, /* substitution string */
1433 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001434 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435{
1436 char *out_s;
1437 char *new_s;
1438 int nfound, offset, new_len;
1439
1440 if (len == 0 || pat_len > len)
1441 goto return_same;
1442
1443 /* find length of output string */
1444 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001445 if (count < 0)
1446 count = INT_MAX;
1447 else if (nfound > count)
1448 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449 if (nfound == 0)
1450 goto return_same;
1451 new_len = len + nfound*(sub_len - pat_len);
1452
Guido van Rossumb18618d2000-05-03 23:44:39 +00001453 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 if (new_s == NULL) return NULL;
1455
1456 *out_len = new_len;
1457 out_s = new_s;
1458
1459 while (len > 0) {
1460 /* find index of next instance of pattern */
1461 offset = mymemfind(str, len, pat, pat_len);
1462 /* if not found, break out of loop */
1463 if (offset == -1) break;
1464
1465 /* copy non matching part of input string */
1466 memcpy(new_s, str, offset); /* copy part of str before pat */
1467 str += offset + pat_len; /* move str past pattern */
1468 len -= offset + pat_len; /* reduce length of str remaining */
1469
1470 /* copy substitute into the output string */
1471 new_s += offset; /* move new_s to dest for sub string */
1472 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1473 new_s += sub_len; /* offset new_s past sub string */
1474
1475 /* break when we've done count replacements */
1476 if (--count == 0) break;
1477 }
1478 /* copy any remaining values into output string */
1479 if (len > 0)
1480 memcpy(new_s, str, len);
1481 return out_s;
1482
1483 return_same:
1484 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001485 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486}
1487
1488
1489static char replace__doc__[] =
1490"S.replace (old, new[, maxsplit]) -> string\n\
1491\n\
1492Return a copy of string S with all occurrences of substring\n\
1493old replaced by new. If the optional argument maxsplit is\n\
1494given, only the first maxsplit occurrences are replaced.";
1495
1496static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001497string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499 const char *str = PyString_AS_STRING(self), *sub, *repl;
1500 char *new_s;
1501 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1502 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001504 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001505
Guido van Rossum4c08d552000-03-10 22:55:18 +00001506 if (!PyArg_ParseTuple(args, "OO|i:replace",
1507 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001508 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001509
1510 if (PyString_Check(subobj)) {
1511 sub = PyString_AS_STRING(subobj);
1512 sub_len = PyString_GET_SIZE(subobj);
1513 }
1514 else if (PyUnicode_Check(subobj))
1515 return PyUnicode_Replace((PyObject *)self,
1516 subobj, replobj, count);
1517 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1518 return NULL;
1519
1520 if (PyString_Check(replobj)) {
1521 repl = PyString_AS_STRING(replobj);
1522 repl_len = PyString_GET_SIZE(replobj);
1523 }
1524 else if (PyUnicode_Check(replobj))
1525 return PyUnicode_Replace((PyObject *)self,
1526 subobj, replobj, count);
1527 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1528 return NULL;
1529
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001530 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001531 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532 return NULL;
1533 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001534 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001535 if (new_s == NULL) {
1536 PyErr_NoMemory();
1537 return NULL;
1538 }
1539 if (out_len == -1) {
1540 /* we're returning another reference to self */
1541 new = (PyObject*)self;
1542 Py_INCREF(new);
1543 }
1544 else {
1545 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001546 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547 }
1548 return new;
1549}
1550
1551
1552static char startswith__doc__[] =
1553"S.startswith(prefix[, start[, end]]) -> int\n\
1554\n\
1555Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1556optional start, test S beginning at that position. With optional end, stop\n\
1557comparing S at that position.";
1558
1559static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001560string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001562 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001563 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565 int plen;
1566 int start = 0;
1567 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001568 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569
Guido van Rossumc6821402000-05-08 14:08:05 +00001570 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1571 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 return NULL;
1573 if (PyString_Check(subobj)) {
1574 prefix = PyString_AS_STRING(subobj);
1575 plen = PyString_GET_SIZE(subobj);
1576 }
1577 else if (PyUnicode_Check(subobj))
1578 return PyInt_FromLong(
1579 PyUnicode_Tailmatch((PyObject *)self,
1580 subobj, start, end, -1));
1581 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 return NULL;
1583
1584 /* adopt Java semantics for index out of range. it is legal for
1585 * offset to be == plen, but this only returns true if prefix is
1586 * the empty string.
1587 */
1588 if (start < 0 || start+plen > len)
1589 return PyInt_FromLong(0);
1590
1591 if (!memcmp(str+start, prefix, plen)) {
1592 /* did the match end after the specified end? */
1593 if (end < 0)
1594 return PyInt_FromLong(1);
1595 else if (end - start < plen)
1596 return PyInt_FromLong(0);
1597 else
1598 return PyInt_FromLong(1);
1599 }
1600 else return PyInt_FromLong(0);
1601}
1602
1603
1604static char endswith__doc__[] =
1605"S.endswith(suffix[, start[, end]]) -> int\n\
1606\n\
1607Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1608optional start, test S beginning at that position. With optional end, stop\n\
1609comparing S at that position.";
1610
1611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001612string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 const char* suffix;
1617 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618 int start = 0;
1619 int end = -1;
1620 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622
Guido van Rossumc6821402000-05-08 14:08:05 +00001623 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1624 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001625 return NULL;
1626 if (PyString_Check(subobj)) {
1627 suffix = PyString_AS_STRING(subobj);
1628 slen = PyString_GET_SIZE(subobj);
1629 }
1630 else if (PyUnicode_Check(subobj))
1631 return PyInt_FromLong(
1632 PyUnicode_Tailmatch((PyObject *)self,
1633 subobj, start, end, +1));
1634 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 return NULL;
1636
Guido van Rossum4c08d552000-03-10 22:55:18 +00001637 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 return PyInt_FromLong(0);
1639
1640 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001641 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642
Guido van Rossum4c08d552000-03-10 22:55:18 +00001643 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001644 return PyInt_FromLong(1);
1645 else return PyInt_FromLong(0);
1646}
1647
1648
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001649static char encode__doc__[] =
1650"S.encode([encoding[,errors]]) -> string\n\
1651\n\
1652Return an encoded string version of S. Default encoding is the current\n\
1653default string encoding. errors may be given to set a different error\n\
1654handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1655a ValueError. Other possible values are 'ignore' and 'replace'.";
1656
1657static PyObject *
1658string_encode(PyStringObject *self, PyObject *args)
1659{
1660 char *encoding = NULL;
1661 char *errors = NULL;
1662 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1663 return NULL;
1664 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1665}
1666
1667
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668static char expandtabs__doc__[] =
1669"S.expandtabs([tabsize]) -> string\n\
1670\n\
1671Return a copy of S where all tab characters are expanded using spaces.\n\
1672If tabsize is not given, a tab size of 8 characters is assumed.";
1673
1674static PyObject*
1675string_expandtabs(PyStringObject *self, PyObject *args)
1676{
1677 const char *e, *p;
1678 char *q;
1679 int i, j;
1680 PyObject *u;
1681 int tabsize = 8;
1682
1683 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1684 return NULL;
1685
1686 /* First pass: determine size of ouput string */
1687 i = j = 0;
1688 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1689 for (p = PyString_AS_STRING(self); p < e; p++)
1690 if (*p == '\t') {
1691 if (tabsize > 0)
1692 j += tabsize - (j % tabsize);
1693 }
1694 else {
1695 j++;
1696 if (*p == '\n' || *p == '\r') {
1697 i += j;
1698 j = 0;
1699 }
1700 }
1701
1702 /* Second pass: create output string and fill it */
1703 u = PyString_FromStringAndSize(NULL, i + j);
1704 if (!u)
1705 return NULL;
1706
1707 j = 0;
1708 q = PyString_AS_STRING(u);
1709
1710 for (p = PyString_AS_STRING(self); p < e; p++)
1711 if (*p == '\t') {
1712 if (tabsize > 0) {
1713 i = tabsize - (j % tabsize);
1714 j += i;
1715 while (i--)
1716 *q++ = ' ';
1717 }
1718 }
1719 else {
1720 j++;
1721 *q++ = *p;
1722 if (*p == '\n' || *p == '\r')
1723 j = 0;
1724 }
1725
1726 return u;
1727}
1728
1729static
1730PyObject *pad(PyStringObject *self,
1731 int left,
1732 int right,
1733 char fill)
1734{
1735 PyObject *u;
1736
1737 if (left < 0)
1738 left = 0;
1739 if (right < 0)
1740 right = 0;
1741
1742 if (left == 0 && right == 0) {
1743 Py_INCREF(self);
1744 return (PyObject *)self;
1745 }
1746
1747 u = PyString_FromStringAndSize(NULL,
1748 left + PyString_GET_SIZE(self) + right);
1749 if (u) {
1750 if (left)
1751 memset(PyString_AS_STRING(u), fill, left);
1752 memcpy(PyString_AS_STRING(u) + left,
1753 PyString_AS_STRING(self),
1754 PyString_GET_SIZE(self));
1755 if (right)
1756 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1757 fill, right);
1758 }
1759
1760 return u;
1761}
1762
1763static char ljust__doc__[] =
1764"S.ljust(width) -> string\n\
1765\n\
1766Return S left justified in a string of length width. Padding is\n\
1767done using spaces.";
1768
1769static PyObject *
1770string_ljust(PyStringObject *self, PyObject *args)
1771{
1772 int width;
1773 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1774 return NULL;
1775
1776 if (PyString_GET_SIZE(self) >= width) {
1777 Py_INCREF(self);
1778 return (PyObject*) self;
1779 }
1780
1781 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1782}
1783
1784
1785static char rjust__doc__[] =
1786"S.rjust(width) -> string\n\
1787\n\
1788Return S right justified in a string of length width. Padding is\n\
1789done using spaces.";
1790
1791static PyObject *
1792string_rjust(PyStringObject *self, PyObject *args)
1793{
1794 int width;
1795 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1796 return NULL;
1797
1798 if (PyString_GET_SIZE(self) >= width) {
1799 Py_INCREF(self);
1800 return (PyObject*) self;
1801 }
1802
1803 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1804}
1805
1806
1807static char center__doc__[] =
1808"S.center(width) -> string\n\
1809\n\
1810Return S centered in a string of length width. Padding is done\n\
1811using spaces.";
1812
1813static PyObject *
1814string_center(PyStringObject *self, PyObject *args)
1815{
1816 int marg, left;
1817 int width;
1818
1819 if (!PyArg_ParseTuple(args, "i:center", &width))
1820 return NULL;
1821
1822 if (PyString_GET_SIZE(self) >= width) {
1823 Py_INCREF(self);
1824 return (PyObject*) self;
1825 }
1826
1827 marg = width - PyString_GET_SIZE(self);
1828 left = marg / 2 + (marg & width & 1);
1829
1830 return pad(self, left, marg - left, ' ');
1831}
1832
1833#if 0
1834static char zfill__doc__[] =
1835"S.zfill(width) -> string\n\
1836\n\
1837Pad a numeric string x with zeros on the left, to fill a field\n\
1838of the specified width. The string x is never truncated.";
1839
1840static PyObject *
1841string_zfill(PyStringObject *self, PyObject *args)
1842{
1843 int fill;
1844 PyObject *u;
1845 char *str;
1846
1847 int width;
1848 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1849 return NULL;
1850
1851 if (PyString_GET_SIZE(self) >= width) {
1852 Py_INCREF(self);
1853 return (PyObject*) self;
1854 }
1855
1856 fill = width - PyString_GET_SIZE(self);
1857
1858 u = pad(self, fill, 0, '0');
1859 if (u == NULL)
1860 return NULL;
1861
1862 str = PyString_AS_STRING(u);
1863 if (str[fill] == '+' || str[fill] == '-') {
1864 /* move sign to beginning of string */
1865 str[0] = str[fill];
1866 str[fill] = '0';
1867 }
1868
1869 return u;
1870}
1871#endif
1872
1873static char isspace__doc__[] =
1874"S.isspace() -> int\n\
1875\n\
1876Return 1 if there are only whitespace characters in S,\n\
18770 otherwise.";
1878
1879static PyObject*
1880string_isspace(PyStringObject *self, PyObject *args)
1881{
Fred Drakeba096332000-07-09 07:04:36 +00001882 register const unsigned char *p
1883 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001884 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885
1886 if (!PyArg_NoArgs(args))
1887 return NULL;
1888
1889 /* Shortcut for single character strings */
1890 if (PyString_GET_SIZE(self) == 1 &&
1891 isspace(*p))
1892 return PyInt_FromLong(1);
1893
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001894 /* Special case for empty strings */
1895 if (PyString_GET_SIZE(self) == 0)
1896 return PyInt_FromLong(0);
1897
Guido van Rossum4c08d552000-03-10 22:55:18 +00001898 e = p + PyString_GET_SIZE(self);
1899 for (; p < e; p++) {
1900 if (!isspace(*p))
1901 return PyInt_FromLong(0);
1902 }
1903 return PyInt_FromLong(1);
1904}
1905
1906
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001907static char isalpha__doc__[] =
1908"S.isalpha() -> int\n\
1909\n\
1910Return 1 if all characters in S are alphabetic\n\
1911and there is at least one character in S, 0 otherwise.";
1912
1913static PyObject*
1914string_isalpha(PyUnicodeObject *self, PyObject *args)
1915{
Fred Drakeba096332000-07-09 07:04:36 +00001916 register const unsigned char *p
1917 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001918 register const unsigned char *e;
1919
1920 if (!PyArg_NoArgs(args))
1921 return NULL;
1922
1923 /* Shortcut for single character strings */
1924 if (PyString_GET_SIZE(self) == 1 &&
1925 isalpha(*p))
1926 return PyInt_FromLong(1);
1927
1928 /* Special case for empty strings */
1929 if (PyString_GET_SIZE(self) == 0)
1930 return PyInt_FromLong(0);
1931
1932 e = p + PyString_GET_SIZE(self);
1933 for (; p < e; p++) {
1934 if (!isalpha(*p))
1935 return PyInt_FromLong(0);
1936 }
1937 return PyInt_FromLong(1);
1938}
1939
1940
1941static char isalnum__doc__[] =
1942"S.isalnum() -> int\n\
1943\n\
1944Return 1 if all characters in S are alphanumeric\n\
1945and there is at least one character in S, 0 otherwise.";
1946
1947static PyObject*
1948string_isalnum(PyUnicodeObject *self, PyObject *args)
1949{
Fred Drakeba096332000-07-09 07:04:36 +00001950 register const unsigned char *p
1951 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001952 register const unsigned char *e;
1953
1954 if (!PyArg_NoArgs(args))
1955 return NULL;
1956
1957 /* Shortcut for single character strings */
1958 if (PyString_GET_SIZE(self) == 1 &&
1959 isalnum(*p))
1960 return PyInt_FromLong(1);
1961
1962 /* Special case for empty strings */
1963 if (PyString_GET_SIZE(self) == 0)
1964 return PyInt_FromLong(0);
1965
1966 e = p + PyString_GET_SIZE(self);
1967 for (; p < e; p++) {
1968 if (!isalnum(*p))
1969 return PyInt_FromLong(0);
1970 }
1971 return PyInt_FromLong(1);
1972}
1973
1974
Guido van Rossum4c08d552000-03-10 22:55:18 +00001975static char isdigit__doc__[] =
1976"S.isdigit() -> int\n\
1977\n\
1978Return 1 if there are only digit characters in S,\n\
19790 otherwise.";
1980
1981static PyObject*
1982string_isdigit(PyStringObject *self, PyObject *args)
1983{
Fred Drakeba096332000-07-09 07:04:36 +00001984 register const unsigned char *p
1985 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001986 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001987
1988 if (!PyArg_NoArgs(args))
1989 return NULL;
1990
1991 /* Shortcut for single character strings */
1992 if (PyString_GET_SIZE(self) == 1 &&
1993 isdigit(*p))
1994 return PyInt_FromLong(1);
1995
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001996 /* Special case for empty strings */
1997 if (PyString_GET_SIZE(self) == 0)
1998 return PyInt_FromLong(0);
1999
Guido van Rossum4c08d552000-03-10 22:55:18 +00002000 e = p + PyString_GET_SIZE(self);
2001 for (; p < e; p++) {
2002 if (!isdigit(*p))
2003 return PyInt_FromLong(0);
2004 }
2005 return PyInt_FromLong(1);
2006}
2007
2008
2009static char islower__doc__[] =
2010"S.islower() -> int\n\
2011\n\
2012Return 1 if all cased characters in S are lowercase and there is\n\
2013at least one cased character in S, 0 otherwise.";
2014
2015static PyObject*
2016string_islower(PyStringObject *self, PyObject *args)
2017{
Fred Drakeba096332000-07-09 07:04:36 +00002018 register const unsigned char *p
2019 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002020 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002021 int cased;
2022
2023 if (!PyArg_NoArgs(args))
2024 return NULL;
2025
2026 /* Shortcut for single character strings */
2027 if (PyString_GET_SIZE(self) == 1)
2028 return PyInt_FromLong(islower(*p) != 0);
2029
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002030 /* Special case for empty strings */
2031 if (PyString_GET_SIZE(self) == 0)
2032 return PyInt_FromLong(0);
2033
Guido van Rossum4c08d552000-03-10 22:55:18 +00002034 e = p + PyString_GET_SIZE(self);
2035 cased = 0;
2036 for (; p < e; p++) {
2037 if (isupper(*p))
2038 return PyInt_FromLong(0);
2039 else if (!cased && islower(*p))
2040 cased = 1;
2041 }
2042 return PyInt_FromLong(cased);
2043}
2044
2045
2046static char isupper__doc__[] =
2047"S.isupper() -> int\n\
2048\n\
2049Return 1 if all cased characters in S are uppercase and there is\n\
2050at least one cased character in S, 0 otherwise.";
2051
2052static PyObject*
2053string_isupper(PyStringObject *self, PyObject *args)
2054{
Fred Drakeba096332000-07-09 07:04:36 +00002055 register const unsigned char *p
2056 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002057 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002058 int cased;
2059
2060 if (!PyArg_NoArgs(args))
2061 return NULL;
2062
2063 /* Shortcut for single character strings */
2064 if (PyString_GET_SIZE(self) == 1)
2065 return PyInt_FromLong(isupper(*p) != 0);
2066
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002067 /* Special case for empty strings */
2068 if (PyString_GET_SIZE(self) == 0)
2069 return PyInt_FromLong(0);
2070
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 e = p + PyString_GET_SIZE(self);
2072 cased = 0;
2073 for (; p < e; p++) {
2074 if (islower(*p))
2075 return PyInt_FromLong(0);
2076 else if (!cased && isupper(*p))
2077 cased = 1;
2078 }
2079 return PyInt_FromLong(cased);
2080}
2081
2082
2083static char istitle__doc__[] =
2084"S.istitle() -> int\n\
2085\n\
2086Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2087may only follow uncased characters and lowercase characters only cased\n\
2088ones. Return 0 otherwise.";
2089
2090static PyObject*
2091string_istitle(PyStringObject *self, PyObject *args)
2092{
Fred Drakeba096332000-07-09 07:04:36 +00002093 register const unsigned char *p
2094 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002095 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096 int cased, previous_is_cased;
2097
2098 if (!PyArg_NoArgs(args))
2099 return NULL;
2100
2101 /* Shortcut for single character strings */
2102 if (PyString_GET_SIZE(self) == 1)
2103 return PyInt_FromLong(isupper(*p) != 0);
2104
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002105 /* Special case for empty strings */
2106 if (PyString_GET_SIZE(self) == 0)
2107 return PyInt_FromLong(0);
2108
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 e = p + PyString_GET_SIZE(self);
2110 cased = 0;
2111 previous_is_cased = 0;
2112 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002113 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002114
2115 if (isupper(ch)) {
2116 if (previous_is_cased)
2117 return PyInt_FromLong(0);
2118 previous_is_cased = 1;
2119 cased = 1;
2120 }
2121 else if (islower(ch)) {
2122 if (!previous_is_cased)
2123 return PyInt_FromLong(0);
2124 previous_is_cased = 1;
2125 cased = 1;
2126 }
2127 else
2128 previous_is_cased = 0;
2129 }
2130 return PyInt_FromLong(cased);
2131}
2132
2133
2134static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002135"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002136\n\
2137Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002138Line breaks are not included in the resulting list unless keepends\n\
2139is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002140
2141#define SPLIT_APPEND(data, left, right) \
2142 str = PyString_FromStringAndSize(data + left, right - left); \
2143 if (!str) \
2144 goto onError; \
2145 if (PyList_Append(list, str)) { \
2146 Py_DECREF(str); \
2147 goto onError; \
2148 } \
2149 else \
2150 Py_DECREF(str);
2151
2152static PyObject*
2153string_splitlines(PyStringObject *self, PyObject *args)
2154{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002155 register int i;
2156 register int j;
2157 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002158 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002159 PyObject *list;
2160 PyObject *str;
2161 char *data;
2162
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002163 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002164 return NULL;
2165
2166 data = PyString_AS_STRING(self);
2167 len = PyString_GET_SIZE(self);
2168
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169 list = PyList_New(0);
2170 if (!list)
2171 goto onError;
2172
2173 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002174 int eol;
2175
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 /* Find a line and append it */
2177 while (i < len && data[i] != '\n' && data[i] != '\r')
2178 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179
2180 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002181 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002182 if (i < len) {
2183 if (data[i] == '\r' && i + 1 < len &&
2184 data[i+1] == '\n')
2185 i += 2;
2186 else
2187 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002188 if (keepends)
2189 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002191 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002192 j = i;
2193 }
2194 if (j < len) {
2195 SPLIT_APPEND(data, j, len);
2196 }
2197
2198 return list;
2199
2200 onError:
2201 Py_DECREF(list);
2202 return NULL;
2203}
2204
2205#undef SPLIT_APPEND
2206
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207
2208static PyMethodDef
2209string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210 /* Counterparts of the obsolete stropmodule functions; except
2211 string.maketrans(). */
2212 {"join", (PyCFunction)string_join, 1, join__doc__},
2213 {"split", (PyCFunction)string_split, 1, split__doc__},
2214 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2215 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2216 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2217 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2218 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2219 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2220 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002221 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2222 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2224 {"count", (PyCFunction)string_count, 1, count__doc__},
2225 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2226 {"find", (PyCFunction)string_find, 1, find__doc__},
2227 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2230 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2231 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2232 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2234 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2235 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2237 {"title", (PyCFunction)string_title, 1, title__doc__},
2238 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2239 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2240 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002241 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2243 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2244#if 0
2245 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2246#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 {NULL, NULL} /* sentinel */
2248};
2249
2250static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002251string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252{
2253 return Py_FindMethod(string_methods, (PyObject*)s, name);
2254}
2255
2256
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002257PyTypeObject PyString_Type = {
2258 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002259 0,
2260 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002261 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002262 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002263 (destructor)string_dealloc, /*tp_dealloc*/
2264 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002266 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002267 (cmpfunc)string_compare, /*tp_compare*/
2268 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002269 0, /*tp_as_number*/
2270 &string_as_sequence, /*tp_as_sequence*/
2271 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002272 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002273 0, /*tp_call*/
2274 0, /*tp_str*/
2275 0, /*tp_getattro*/
2276 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002277 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002278 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002279 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002280};
2281
2282void
Fred Drakeba096332000-07-09 07:04:36 +00002283PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002284{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002285 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002286 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002287 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002288 if (w == NULL || !PyString_Check(*pv)) {
2289 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002290 *pv = NULL;
2291 return;
2292 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002293 v = string_concat((PyStringObject *) *pv, w);
2294 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002295 *pv = v;
2296}
2297
Guido van Rossum013142a1994-08-30 08:19:36 +00002298void
Fred Drakeba096332000-07-09 07:04:36 +00002299PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002300{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002301 PyString_Concat(pv, w);
2302 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002303}
2304
2305
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002306/* The following function breaks the notion that strings are immutable:
2307 it changes the size of a string. We get away with this only if there
2308 is only one module referencing the object. You can also think of it
2309 as creating a new string object and destroying the old one, only
2310 more efficiently. In any case, don't use this if the string may
2311 already be known to some other part of the code... */
2312
2313int
Fred Drakeba096332000-07-09 07:04:36 +00002314_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002315{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002316 register PyObject *v;
2317 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002318 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002319 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002320 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002321 Py_DECREF(v);
2322 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002323 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002324 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002325 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002326#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002327 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002328#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002329 _Py_ForgetReference(v);
2330 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002331 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002332 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002333 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002334 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002335 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002336 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002337 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002338 _Py_NewReference(*pv);
2339 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002340 sv->ob_size = newsize;
2341 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002342 return 0;
2343}
Guido van Rossume5372401993-03-16 12:15:04 +00002344
2345/* Helpers for formatstring */
2346
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002348getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002349{
2350 int argidx = *p_argidx;
2351 if (argidx < arglen) {
2352 (*p_argidx)++;
2353 if (arglen < 0)
2354 return args;
2355 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002356 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002357 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002358 PyErr_SetString(PyExc_TypeError,
2359 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002360 return NULL;
2361}
2362
2363#define F_LJUST (1<<0)
2364#define F_SIGN (1<<1)
2365#define F_BLANK (1<<2)
2366#define F_ALT (1<<3)
2367#define F_ZERO (1<<4)
2368
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002369static int
Fred Drakeba096332000-07-09 07:04:36 +00002370formatfloat(char *buf, size_t buflen, int flags,
2371 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002372{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002373 /* fmt = '%#.' + `prec` + `type`
2374 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002375 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002376 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002377 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002378 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002379 if (prec < 0)
2380 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002381 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2382 type = 'g';
2383 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002384 /* worst case length calc to ensure no buffer overrun:
2385 fmt = %#.<prec>g
2386 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2387 for any double rep.)
2388 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2389 If prec=0 the effective precision is 1 (the leading digit is
2390 always given), therefore increase by one to 10+prec. */
2391 if (buflen <= (size_t)10 + (size_t)prec) {
2392 PyErr_SetString(PyExc_OverflowError,
2393 "formatted float is too long (precision too long?)");
2394 return -1;
2395 }
Guido van Rossume5372401993-03-16 12:15:04 +00002396 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002397 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002398}
2399
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002400static int
Fred Drakeba096332000-07-09 07:04:36 +00002401formatint(char *buf, size_t buflen, int flags,
2402 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002403{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002404 /* fmt = '%#.' + `prec` + 'l' + `type`
2405 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002406 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002407 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002408 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002409 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002410 if (prec < 0)
2411 prec = 1;
2412 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002413 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2414 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2415 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2416 PyErr_SetString(PyExc_OverflowError,
2417 "formatted integer is too long (precision too long?)");
2418 return -1;
2419 }
Guido van Rossume5372401993-03-16 12:15:04 +00002420 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002421 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002422}
2423
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002424static int
Fred Drakeba096332000-07-09 07:04:36 +00002425formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002426{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002427 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002428 if (PyString_Check(v)) {
2429 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002430 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002431 }
2432 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002433 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002434 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002435 }
2436 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002437 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002438}
2439
Guido van Rossum013142a1994-08-30 08:19:36 +00002440
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002441/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2442
2443 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2444 chars are formatted. XXX This is a magic number. Each formatting
2445 routine does bounds checking to ensure no overflow, but a better
2446 solution may be to malloc a buffer of appropriate size for each
2447 format. For now, the current solution is sufficient.
2448*/
2449#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002450
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002451PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002452PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002453{
2454 char *fmt, *res;
2455 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002456 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002457 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002458 PyObject *dict = NULL;
2459 if (format == NULL || !PyString_Check(format) || args == NULL) {
2460 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002461 return NULL;
2462 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002463 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002464 fmt = PyString_AsString(format);
2465 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002466 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002467 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002468 if (result == NULL)
2469 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002470 res = PyString_AsString(result);
2471 if (PyTuple_Check(args)) {
2472 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002473 argidx = 0;
2474 }
2475 else {
2476 arglen = -1;
2477 argidx = -2;
2478 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002479 if (args->ob_type->tp_as_mapping)
2480 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002481 while (--fmtcnt >= 0) {
2482 if (*fmt != '%') {
2483 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002484 rescnt = fmtcnt + 100;
2485 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002486 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002487 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002488 res = PyString_AsString(result)
2489 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002490 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002491 }
2492 *res++ = *fmt++;
2493 }
2494 else {
2495 /* Got a format specifier */
2496 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002497 int width = -1;
2498 int prec = -1;
2499 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002500 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002501 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002502 PyObject *v = NULL;
2503 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002504 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002505 int sign;
2506 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002507 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002508 char *fmt_start = fmt;
2509
Guido van Rossumda9c2711996-12-05 21:58:58 +00002510 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002511 if (*fmt == '(') {
2512 char *keystart;
2513 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002514 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002515 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002516
2517 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002518 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002519 "format requires a mapping");
2520 goto error;
2521 }
2522 ++fmt;
2523 --fmtcnt;
2524 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002525 /* Skip over balanced parentheses */
2526 while (pcount > 0 && --fmtcnt >= 0) {
2527 if (*fmt == ')')
2528 --pcount;
2529 else if (*fmt == '(')
2530 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002531 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002532 }
2533 keylen = fmt - keystart - 1;
2534 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002535 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002536 "incomplete format key");
2537 goto error;
2538 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002539 key = PyString_FromStringAndSize(keystart,
2540 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002541 if (key == NULL)
2542 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002543 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002544 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002545 args_owned = 0;
2546 }
2547 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002548 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002549 if (args == NULL) {
2550 goto error;
2551 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002552 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002553 arglen = -1;
2554 argidx = -2;
2555 }
Guido van Rossume5372401993-03-16 12:15:04 +00002556 while (--fmtcnt >= 0) {
2557 switch (c = *fmt++) {
2558 case '-': flags |= F_LJUST; continue;
2559 case '+': flags |= F_SIGN; continue;
2560 case ' ': flags |= F_BLANK; continue;
2561 case '#': flags |= F_ALT; continue;
2562 case '0': flags |= F_ZERO; continue;
2563 }
2564 break;
2565 }
2566 if (c == '*') {
2567 v = getnextarg(args, arglen, &argidx);
2568 if (v == NULL)
2569 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002570 if (!PyInt_Check(v)) {
2571 PyErr_SetString(PyExc_TypeError,
2572 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002573 goto error;
2574 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002575 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002576 if (width < 0) {
2577 flags |= F_LJUST;
2578 width = -width;
2579 }
Guido van Rossume5372401993-03-16 12:15:04 +00002580 if (--fmtcnt >= 0)
2581 c = *fmt++;
2582 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002583 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002584 width = c - '0';
2585 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002586 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002587 if (!isdigit(c))
2588 break;
2589 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002590 PyErr_SetString(
2591 PyExc_ValueError,
2592 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002593 goto error;
2594 }
2595 width = width*10 + (c - '0');
2596 }
2597 }
2598 if (c == '.') {
2599 prec = 0;
2600 if (--fmtcnt >= 0)
2601 c = *fmt++;
2602 if (c == '*') {
2603 v = getnextarg(args, arglen, &argidx);
2604 if (v == NULL)
2605 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002606 if (!PyInt_Check(v)) {
2607 PyErr_SetString(
2608 PyExc_TypeError,
2609 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002610 goto error;
2611 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002612 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002613 if (prec < 0)
2614 prec = 0;
2615 if (--fmtcnt >= 0)
2616 c = *fmt++;
2617 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002618 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002619 prec = c - '0';
2620 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002621 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002622 if (!isdigit(c))
2623 break;
2624 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002625 PyErr_SetString(
2626 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002627 "prec too big");
2628 goto error;
2629 }
2630 prec = prec*10 + (c - '0');
2631 }
2632 }
2633 } /* prec */
2634 if (fmtcnt >= 0) {
2635 if (c == 'h' || c == 'l' || c == 'L') {
2636 size = c;
2637 if (--fmtcnt >= 0)
2638 c = *fmt++;
2639 }
2640 }
2641 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002642 PyErr_SetString(PyExc_ValueError,
2643 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002644 goto error;
2645 }
2646 if (c != '%') {
2647 v = getnextarg(args, arglen, &argidx);
2648 if (v == NULL)
2649 goto error;
2650 }
2651 sign = 0;
2652 fill = ' ';
2653 switch (c) {
2654 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002655 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002656 len = 1;
2657 break;
2658 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002659 case 'r':
2660 if (PyUnicode_Check(v)) {
2661 fmt = fmt_start;
2662 goto unicode;
2663 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002664 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002665 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002666 else
2667 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002668 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002669 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002670 if (!PyString_Check(temp)) {
2671 PyErr_SetString(PyExc_TypeError,
2672 "%s argument has non-string str()");
2673 goto error;
2674 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002675 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002676 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002677 if (prec >= 0 && len > prec)
2678 len = prec;
2679 break;
2680 case 'i':
2681 case 'd':
2682 case 'u':
2683 case 'o':
2684 case 'x':
2685 case 'X':
2686 if (c == 'i')
2687 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002688 pbuf = formatbuf;
2689 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002690 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002691 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002692 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002693 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002694 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002695 if ((flags&F_ALT) &&
2696 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002697 pbuf[0] == '0' && pbuf[1] == c) {
2698 *res++ = *pbuf++;
2699 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002700 rescnt -= 2;
2701 len -= 2;
2702 width -= 2;
2703 if (width < 0)
2704 width = 0;
2705 }
2706 }
Guido van Rossume5372401993-03-16 12:15:04 +00002707 break;
2708 case 'e':
2709 case 'E':
2710 case 'f':
2711 case 'g':
2712 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002713 pbuf = formatbuf;
2714 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002715 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002716 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002717 sign = 1;
2718 if (flags&F_ZERO)
2719 fill = '0';
2720 break;
2721 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002722 pbuf = formatbuf;
2723 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002724 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002725 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002726 break;
2727 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002728 PyErr_Format(PyExc_ValueError,
2729 "unsupported format character '%c' (0x%x)",
2730 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002731 goto error;
2732 }
2733 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002734 if (*pbuf == '-' || *pbuf == '+') {
2735 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002736 len--;
2737 }
2738 else if (flags & F_SIGN)
2739 sign = '+';
2740 else if (flags & F_BLANK)
2741 sign = ' ';
2742 else
2743 sign = '\0';
2744 }
2745 if (width < len)
2746 width = len;
2747 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002748 reslen -= rescnt;
2749 rescnt = width + fmtcnt + 100;
2750 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002751 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002752 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002753 res = PyString_AsString(result)
2754 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002755 }
2756 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002757 if (fill != ' ')
2758 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002759 rescnt--;
2760 if (width > len)
2761 width--;
2762 }
2763 if (width > len && !(flags&F_LJUST)) {
2764 do {
2765 --rescnt;
2766 *res++ = fill;
2767 } while (--width > len);
2768 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002769 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002770 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002771 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002772 res += len;
2773 rescnt -= len;
2774 while (--width >= len) {
2775 --rescnt;
2776 *res++ = ' ';
2777 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002778 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002779 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002780 "not all arguments converted");
2781 goto error;
2782 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002783 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002784 } /* '%' */
2785 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002786 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002787 PyErr_SetString(PyExc_TypeError,
2788 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002789 goto error;
2790 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002791 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002792 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002793 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002794 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002795 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002796
2797 unicode:
2798 if (args_owned) {
2799 Py_DECREF(args);
2800 args_owned = 0;
2801 }
2802 /* Fiddle args right (remove the first argidx-1 arguments) */
2803 --argidx;
2804 if (PyTuple_Check(orig_args) && argidx > 0) {
2805 PyObject *v;
2806 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2807 v = PyTuple_New(n);
2808 if (v == NULL)
2809 goto error;
2810 while (--n >= 0) {
2811 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2812 Py_INCREF(w);
2813 PyTuple_SET_ITEM(v, n, w);
2814 }
2815 args = v;
2816 } else {
2817 Py_INCREF(orig_args);
2818 args = orig_args;
2819 }
2820 /* Paste rest of format string to what we have of the result
2821 string; we reuse result for this */
2822 rescnt = res - PyString_AS_STRING(result);
2823 fmtcnt = PyString_GET_SIZE(format) - \
2824 (fmt - PyString_AS_STRING(format));
2825 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2826 Py_DECREF(args);
2827 goto error;
2828 }
2829 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2830 format = result;
2831 /* Let Unicode do its magic */
2832 result = PyUnicode_Format(format, args);
2833 Py_DECREF(format);
2834 Py_DECREF(args);
2835 return result;
2836
Guido van Rossume5372401993-03-16 12:15:04 +00002837 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002838 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002839 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002840 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002841 }
Guido van Rossume5372401993-03-16 12:15:04 +00002842 return NULL;
2843}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002844
2845
2846#ifdef INTERN_STRINGS
2847
2848static PyObject *interned;
2849
2850void
Fred Drakeba096332000-07-09 07:04:36 +00002851PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002852{
2853 register PyStringObject *s = (PyStringObject *)(*p);
2854 PyObject *t;
2855 if (s == NULL || !PyString_Check(s))
2856 Py_FatalError("PyString_InternInPlace: strings only please!");
2857 if ((t = s->ob_sinterned) != NULL) {
2858 if (t == (PyObject *)s)
2859 return;
2860 Py_INCREF(t);
2861 *p = t;
2862 Py_DECREF(s);
2863 return;
2864 }
2865 if (interned == NULL) {
2866 interned = PyDict_New();
2867 if (interned == NULL)
2868 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002869 }
2870 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2871 Py_INCREF(t);
2872 *p = s->ob_sinterned = t;
2873 Py_DECREF(s);
2874 return;
2875 }
2876 t = (PyObject *)s;
2877 if (PyDict_SetItem(interned, t, t) == 0) {
2878 s->ob_sinterned = t;
2879 return;
2880 }
2881 PyErr_Clear();
2882}
2883
2884
2885PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002886PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002887{
2888 PyObject *s = PyString_FromString(cp);
2889 if (s == NULL)
2890 return NULL;
2891 PyString_InternInPlace(&s);
2892 return s;
2893}
2894
2895#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002896
2897void
Fred Drakeba096332000-07-09 07:04:36 +00002898PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00002899{
2900 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002901 for (i = 0; i < UCHAR_MAX + 1; i++) {
2902 Py_XDECREF(characters[i]);
2903 characters[i] = NULL;
2904 }
2905#ifndef DONT_SHARE_SHORT_STRINGS
2906 Py_XDECREF(nullstring);
2907 nullstring = NULL;
2908#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002909#ifdef INTERN_STRINGS
2910 if (interned) {
2911 int pos, changed;
2912 PyObject *key, *value;
2913 do {
2914 changed = 0;
2915 pos = 0;
2916 while (PyDict_Next(interned, &pos, &key, &value)) {
2917 if (key->ob_refcnt == 2 && key == value) {
2918 PyDict_DelItem(interned, key);
2919 changed = 1;
2920 }
2921 }
2922 } while (changed);
2923 }
2924#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002925}