blob: dc41122c8a81dcfa9d4c8af381e5467cc9d49c1b [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* String object implementation */
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014
Guido van Rossum71160aa1997-06-03 18:03:18 +000015#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000016#include <ctype.h>
17
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018#ifdef COUNT_ALLOCS
19int null_strings, one_strings;
20#endif
21
Guido van Rossum03093a21994-09-28 15:51:32 +000022#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000023#include <limits.h>
24#else
25#ifndef UCHAR_MAX
26#define UCHAR_MAX 255
27#endif
28#endif
29
Guido van Rossumc0b618a1997-05-02 03:12:38 +000030static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000031#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000032static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000033#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000034
35/*
36 Newsizedstringobject() and newstringobject() try in certain cases
37 to share string objects. When the size of the string is zero,
38 these routines always return a pointer to the same string object;
39 when the size is one, they return a pointer to an already existing
40 object if the contents of the string is known. For
41 newstringobject() this is always the case, for
42 newsizedstringobject() this is the case when the first argument in
43 not NULL.
44 A common practice to allocate a string and then fill it in or
45 change it must be done carefully. It is only allowed to change the
46 contents of the string if the obect was gotten from
47 newsizedstringobject() with a NULL first argument, because in the
48 future these routines may try to do even more sharing of objects.
49*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000050PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000051PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000052{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000071#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
73 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000074 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000075 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef CACHE_HASH
80 op->ob_shash = -1;
81#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000082#ifdef INTERN_STRINGS
83 op->ob_sinterned = NULL;
84#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 if (str != NULL)
86 memcpy(op->ob_sval, str, size);
87 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 if (size == 0) {
90 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
93 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000103 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000105 if (size > INT_MAX) {
106 PyErr_SetString(PyExc_OverflowError,
107 "string is too long for a Python string");
108 return NULL;
109 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000110#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000111 if (size == 0 && (op = nullstring) != NULL) {
112#ifdef COUNT_ALLOCS
113 null_strings++;
114#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000115 Py_INCREF(op);
116 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 }
118 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
119#ifdef COUNT_ALLOCS
120 one_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000125#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
127 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133#ifdef CACHE_HASH
134 op->ob_shash = -1;
135#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000136#ifdef INTERN_STRINGS
137 op->ob_sinterned = NULL;
138#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000140#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000148#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150}
151
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000152PyObject *PyString_Decode(const char *s,
153 int size,
154 const char *encoding,
155 const char *errors)
156{
157 PyObject *buffer = NULL, *str;
158
159 if (encoding == NULL)
160 encoding = PyUnicode_GetDefaultEncoding();
161
162 /* Decode via the codec registry */
163 buffer = PyBuffer_FromMemory((void *)s, size);
164 if (buffer == NULL)
165 goto onError;
166 str = PyCodec_Decode(buffer, encoding, errors);
167 if (str == NULL)
168 goto onError;
169 /* Convert Unicode to a string using the default encoding */
170 if (PyUnicode_Check(str)) {
171 PyObject *temp = str;
172 str = PyUnicode_AsEncodedString(str, NULL, NULL);
173 Py_DECREF(temp);
174 if (str == NULL)
175 goto onError;
176 }
177 if (!PyString_Check(str)) {
178 PyErr_Format(PyExc_TypeError,
179 "decoder did not return an string object (type=%.400s)",
180 str->ob_type->tp_name);
181 Py_DECREF(str);
182 goto onError;
183 }
184 Py_DECREF(buffer);
185 return str;
186
187 onError:
188 Py_XDECREF(buffer);
189 return NULL;
190}
191
192PyObject *PyString_Encode(const char *s,
193 int size,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v, *str;
198
199 str = PyString_FromStringAndSize(s, size);
200 if (str == NULL)
201 return NULL;
202 v = PyString_AsEncodedString(str, encoding, errors);
203 Py_DECREF(str);
204 return v;
205}
206
207PyObject *PyString_AsEncodedString(PyObject *str,
208 const char *encoding,
209 const char *errors)
210{
211 PyObject *v;
212
213 if (!PyString_Check(str)) {
214 PyErr_BadArgument();
215 goto onError;
216 }
217
218 if (encoding == NULL)
219 encoding = PyUnicode_GetDefaultEncoding();
220
221 /* Encode via the codec registry */
222 v = PyCodec_Encode(str, encoding, errors);
223 if (v == NULL)
224 goto onError;
225 /* Convert Unicode to a string using the default encoding */
226 if (PyUnicode_Check(v)) {
227 PyObject *temp = v;
228 v = PyUnicode_AsEncodedString(v, NULL, NULL);
229 Py_DECREF(temp);
230 if (v == NULL)
231 goto onError;
232 }
233 if (!PyString_Check(v)) {
234 PyErr_Format(PyExc_TypeError,
235 "encoder did not return a string object (type=%.400s)",
236 v->ob_type->tp_name);
237 Py_DECREF(v);
238 goto onError;
239 }
240 return v;
241
242 onError:
243 return NULL;
244}
245
Guido van Rossum234f9421993-06-17 12:35:49 +0000246static void
Fred Drakeba096332000-07-09 07:04:36 +0000247string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000248{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000249 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000250}
251
Guido van Rossumd7047b31995-01-02 19:07:15 +0000252int
Fred Drakeba096332000-07-09 07:04:36 +0000253PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000255 if (!PyString_Check(op)) {
256 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000257 return -1;
258 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000259 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000263PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000265 if (!PyString_Check(op)) {
266 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 return NULL;
268 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000269 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000270}
271
272/* Methods */
273
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000274static int
Fred Drakeba096332000-07-09 07:04:36 +0000275string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276{
277 int i;
278 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000279 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000280 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000283 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000285
286 /* figure out which quote to use; single is prefered */
287 quote = '\'';
288 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
289 quote = '"';
290
291 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292 for (i = 0; i < op->ob_size; i++) {
293 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000294 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000295 fprintf(fp, "\\%c", c);
296 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000297 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000299 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000301 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000302 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303}
304
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000305static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000306string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000308 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
309 PyObject *v;
310 if (newsize > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "string is too large to make repr");
313 }
314 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000317 }
318 else {
319 register int i;
320 register char c;
321 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000322 int quote;
323
324 /* figure out which quote to use; single is prefered */
325 quote = '\'';
326 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
327 quote = '"';
328
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000329 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000330 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000331 for (i = 0; i < op->ob_size; i++) {
332 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 *p++ = '\\', *p++ = c;
335 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 while (*p != '\0')
338 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339 }
340 else
341 *p++ = c;
342 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 _PyString_Resize(
346 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000347 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349}
350
351static int
Fred Drakeba096332000-07-09 07:04:36 +0000352string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000353{
354 return a->ob_size;
355}
356
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000357static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000358string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359{
360 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000361 register PyStringObject *op;
362 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000363 if (PyUnicode_Check(bb))
364 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000365 PyErr_Format(PyExc_TypeError,
366 "cannot add type \"%.200s\" to string",
367 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 return NULL;
369 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000370#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371 /* Optimize cases with empty left or right operand */
372 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000373 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000374 return bb;
375 }
376 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 Py_INCREF(a);
378 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 }
380 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000381 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000382 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000383 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000384 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000385 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000386 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000387#ifdef CACHE_HASH
388 op->ob_shash = -1;
389#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000390#ifdef INTERN_STRINGS
391 op->ob_sinterned = NULL;
392#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000393 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
394 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
395 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000396 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397#undef b
398}
399
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000400static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000401string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402{
403 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000404 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000405 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406 if (n < 0)
407 n = 0;
408 size = a->ob_size * n;
409 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000410 Py_INCREF(a);
411 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000413 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000414 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000415 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000416 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000418 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000419#ifdef CACHE_HASH
420 op->ob_shash = -1;
421#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000422#ifdef INTERN_STRINGS
423 op->ob_sinterned = NULL;
424#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000425 for (i = 0; i < size; i += a->ob_size)
426 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
427 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000429}
430
431/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
432
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000434string_slice(register PyStringObject *a, register int i, register int j)
435 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436{
437 if (i < 0)
438 i = 0;
439 if (j < 0)
440 j = 0; /* Avoid signed/unsigned bug in next line */
441 if (j > a->ob_size)
442 j = a->ob_size;
443 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000444 Py_INCREF(a);
445 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000446 }
447 if (j < i)
448 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000449 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450}
451
Guido van Rossum9284a572000-03-07 15:53:43 +0000452static int
Fred Drakeba096332000-07-09 07:04:36 +0000453string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000454{
455 register char *s, *end;
456 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000457 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000458 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000459 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000460 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000461 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000462 return -1;
463 }
464 c = PyString_AsString(el)[0];
465 s = PyString_AsString(a);
466 end = s + PyString_Size(a);
467 while (s < end) {
468 if (c == *s++)
469 return 1;
470 }
471 return 0;
472}
473
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000475string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000476{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000477 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000478 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000479 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000480 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 return NULL;
482 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000483 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000484 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000485#ifdef COUNT_ALLOCS
486 if (v != NULL)
487 one_strings++;
488#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000489 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000491 if (v == NULL)
492 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000493 characters[c] = (PyStringObject *) v;
494 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000495 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000496 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000497 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000498}
499
500static int
Fred Drakeba096332000-07-09 07:04:36 +0000501string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000502{
Guido van Rossum253919f1991-02-13 23:18:39 +0000503 int len_a = a->ob_size, len_b = b->ob_size;
504 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000505 int cmp;
506 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000507 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000508 if (cmp == 0)
509 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
510 if (cmp != 0)
511 return cmp;
512 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000513 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
Guido van Rossum9bfef441993-03-29 10:43:31 +0000516static long
Fred Drakeba096332000-07-09 07:04:36 +0000517string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000518{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000519 register int len;
520 register unsigned char *p;
521 register long x;
522
523#ifdef CACHE_HASH
524 if (a->ob_shash != -1)
525 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000526#ifdef INTERN_STRINGS
527 if (a->ob_sinterned != NULL)
528 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000529 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000530#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000531#endif
532 len = a->ob_size;
533 p = (unsigned char *) a->ob_sval;
534 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000535 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000536 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000537 x ^= a->ob_size;
538 if (x == -1)
539 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000540#ifdef CACHE_HASH
541 a->ob_shash = x;
542#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000543 return x;
544}
545
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000546static int
Fred Drakeba096332000-07-09 07:04:36 +0000547string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000548{
549 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000550 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000551 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000552 return -1;
553 }
554 *ptr = (void *)self->ob_sval;
555 return self->ob_size;
556}
557
558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000560{
Guido van Rossum045e6881997-09-08 18:30:11 +0000561 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000562 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000563 return -1;
564}
565
566static int
Fred Drakeba096332000-07-09 07:04:36 +0000567string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000568{
569 if ( lenp )
570 *lenp = self->ob_size;
571 return 1;
572}
573
Guido van Rossum1db70701998-10-08 02:18:52 +0000574static int
Fred Drakeba096332000-07-09 07:04:36 +0000575string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000576{
577 if ( index != 0 ) {
578 PyErr_SetString(PyExc_SystemError,
579 "accessing non-existent string segment");
580 return -1;
581 }
582 *ptr = self->ob_sval;
583 return self->ob_size;
584}
585
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000587 (inquiry)string_length, /*sq_length*/
588 (binaryfunc)string_concat, /*sq_concat*/
589 (intargfunc)string_repeat, /*sq_repeat*/
590 (intargfunc)string_item, /*sq_item*/
591 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000592 0, /*sq_ass_item*/
593 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000594 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000595};
596
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000597static PyBufferProcs string_as_buffer = {
598 (getreadbufferproc)string_buffer_getreadbuf,
599 (getwritebufferproc)string_buffer_getwritebuf,
600 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000601 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000602};
603
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000604
605
606#define LEFTSTRIP 0
607#define RIGHTSTRIP 1
608#define BOTHSTRIP 2
609
610
611static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000612split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000613{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000614 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000615 PyObject* item;
616 PyObject *list = PyList_New(0);
617
618 if (list == NULL)
619 return NULL;
620
Guido van Rossum4c08d552000-03-10 22:55:18 +0000621 for (i = j = 0; i < len; ) {
622 while (i < len && isspace(Py_CHARMASK(s[i])))
623 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000624 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 while (i < len && !isspace(Py_CHARMASK(s[i])))
626 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000627 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000628 if (maxsplit-- <= 0)
629 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 item = PyString_FromStringAndSize(s+j, (int)(i-j));
631 if (item == NULL)
632 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000633 err = PyList_Append(list, item);
634 Py_DECREF(item);
635 if (err < 0)
636 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000637 while (i < len && isspace(Py_CHARMASK(s[i])))
638 i++;
639 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000640 }
641 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000642 if (j < len) {
643 item = PyString_FromStringAndSize(s+j, (int)(len - j));
644 if (item == NULL)
645 goto finally;
646 err = PyList_Append(list, item);
647 Py_DECREF(item);
648 if (err < 0)
649 goto finally;
650 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000651 return list;
652 finally:
653 Py_DECREF(list);
654 return NULL;
655}
656
657
658static char split__doc__[] =
659"S.split([sep [,maxsplit]]) -> list of strings\n\
660\n\
661Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000662delimiter string. If maxsplit is given, at most maxsplit\n\
663splits are done. If sep is not specified, any whitespace string\n\
664is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665
666static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000667string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000668{
669 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000670 int maxsplit = -1;
671 const char *s = PyString_AS_STRING(self), *sub;
672 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000673
Guido van Rossum4c08d552000-03-10 22:55:18 +0000674 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000675 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000676 if (maxsplit < 0)
677 maxsplit = INT_MAX;
678 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000679 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000680 if (PyString_Check(subobj)) {
681 sub = PyString_AS_STRING(subobj);
682 n = PyString_GET_SIZE(subobj);
683 }
684 else if (PyUnicode_Check(subobj))
685 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
686 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
687 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688 if (n == 0) {
689 PyErr_SetString(PyExc_ValueError, "empty separator");
690 return NULL;
691 }
692
693 list = PyList_New(0);
694 if (list == NULL)
695 return NULL;
696
697 i = j = 0;
698 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000699 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (maxsplit-- <= 0)
701 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000702 item = PyString_FromStringAndSize(s+j, (int)(i-j));
703 if (item == NULL)
704 goto fail;
705 err = PyList_Append(list, item);
706 Py_DECREF(item);
707 if (err < 0)
708 goto fail;
709 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000710 }
711 else
712 i++;
713 }
714 item = PyString_FromStringAndSize(s+j, (int)(len-j));
715 if (item == NULL)
716 goto fail;
717 err = PyList_Append(list, item);
718 Py_DECREF(item);
719 if (err < 0)
720 goto fail;
721
722 return list;
723
724 fail:
725 Py_DECREF(list);
726 return NULL;
727}
728
729
730static char join__doc__[] =
731"S.join(sequence) -> string\n\
732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000733Return a string which is the concatenation of the strings in the\n\
734sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000735
736static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000737string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000738{
739 char *sep = PyString_AS_STRING(self);
740 int seplen = PyString_GET_SIZE(self);
741 PyObject *res = NULL;
742 int reslen = 0;
743 char *p;
744 int seqlen = 0;
745 int sz = 100;
746 int i, slen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000747 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000748
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000749 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000750 return NULL;
751
Barry Warsaw771d0672000-07-11 04:58:12 +0000752 if (!(seq = PySequence_Fast(orig, ""))) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000753 if (PyErr_ExceptionMatches(PyExc_TypeError))
754 PyErr_Format(PyExc_TypeError,
755 "sequence expected, %.80s found",
756 orig->ob_type->tp_name);
757 return NULL;
758 }
Barry Warsaw771d0672000-07-11 04:58:12 +0000759 /* From here on out, errors go through finally: for proper
760 * reference count manipulations.
761 */
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 seqlen = PySequence_Length(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000763 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000764 item = PySequence_Fast_GET_ITEM(seq, 0);
765 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000766 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000767 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000768 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000769
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000770 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
Barry Warsaw771d0672000-07-11 04:58:12 +0000771 goto finally;
772
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000773 p = PyString_AsString(res);
774
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000775 for (i = 0; i < seqlen; i++) {
776 item = PySequence_Fast_GET_ITEM(seq, i);
777 if (!PyString_Check(item)){
778 if (PyUnicode_Check(item)) {
779 Py_DECREF(res);
Barry Warsaw771d0672000-07-11 04:58:12 +0000780 Py_DECREF(seq);
781 return PyUnicode_Join((PyObject *)self, seq);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000782 }
783 PyErr_Format(PyExc_TypeError,
784 "sequence item %i: expected string, %.80s found",
785 i, item->ob_type->tp_name);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000786 goto finally;
787 }
788 slen = PyString_GET_SIZE(item);
789 while (reslen + slen + seplen >= sz) {
790 if (_PyString_Resize(&res, sz*2)) {
Barry Warsawbf325832000-03-06 14:52:18 +0000791 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000792 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000793 sz *= 2;
794 p = PyString_AsString(res) + reslen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000795 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000796 if (i > 0) {
797 memcpy(p, sep, seplen);
798 p += seplen;
799 reslen += seplen;
800 }
801 memcpy(p, PyString_AS_STRING(item), slen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000802 p += slen;
803 reslen += slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000804 }
805 if (_PyString_Resize(&res, reslen))
806 goto finally;
Jeremy Hylton49048292000-07-11 03:28:17 +0000807 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000808 return res;
809
810 finally:
Jeremy Hylton49048292000-07-11 03:28:17 +0000811 Py_DECREF(seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000812 Py_XDECREF(res);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000813 return NULL;
814}
815
816
817
818static long
Fred Drakeba096332000-07-09 07:04:36 +0000819string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000820{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000821 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000822 int len = PyString_GET_SIZE(self);
823 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000824 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825
Guido van Rossumc6821402000-05-08 14:08:05 +0000826 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
827 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000828 return -2;
829 if (PyString_Check(subobj)) {
830 sub = PyString_AS_STRING(subobj);
831 n = PyString_GET_SIZE(subobj);
832 }
833 else if (PyUnicode_Check(subobj))
834 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
835 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000836 return -2;
837
838 if (last > len)
839 last = len;
840 if (last < 0)
841 last += len;
842 if (last < 0)
843 last = 0;
844 if (i < 0)
845 i += len;
846 if (i < 0)
847 i = 0;
848
Guido van Rossum4c08d552000-03-10 22:55:18 +0000849 if (dir > 0) {
850 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000851 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000852 last -= n;
853 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000854 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000855 return (long)i;
856 }
857 else {
858 int j;
859
860 if (n == 0 && i <= last)
861 return (long)last;
862 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000863 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000864 return (long)j;
865 }
866
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000867 return -1;
868}
869
870
871static char find__doc__[] =
872"S.find(sub [,start [,end]]) -> int\n\
873\n\
874Return the lowest index in S where substring sub is found,\n\
875such that sub is contained within s[start,end]. Optional\n\
876arguments start and end are interpreted as in slice notation.\n\
877\n\
878Return -1 on failure.";
879
880static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000881string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000882{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000883 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000884 if (result == -2)
885 return NULL;
886 return PyInt_FromLong(result);
887}
888
889
890static char index__doc__[] =
891"S.index(sub [,start [,end]]) -> int\n\
892\n\
893Like S.find() but raise ValueError when the substring is not found.";
894
895static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000896string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000897{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000898 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000899 if (result == -2)
900 return NULL;
901 if (result == -1) {
902 PyErr_SetString(PyExc_ValueError,
903 "substring not found in string.index");
904 return NULL;
905 }
906 return PyInt_FromLong(result);
907}
908
909
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000910static char rfind__doc__[] =
911"S.rfind(sub [,start [,end]]) -> int\n\
912\n\
913Return the highest index in S where substring sub is found,\n\
914such that sub is contained within s[start,end]. Optional\n\
915arguments start and end are interpreted as in slice notation.\n\
916\n\
917Return -1 on failure.";
918
919static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000920string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000921{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000922 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000923 if (result == -2)
924 return NULL;
925 return PyInt_FromLong(result);
926}
927
928
929static char rindex__doc__[] =
930"S.rindex(sub [,start [,end]]) -> int\n\
931\n\
932Like S.rfind() but raise ValueError when the substring is not found.";
933
934static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000935string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000936{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000937 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000938 if (result == -2)
939 return NULL;
940 if (result == -1) {
941 PyErr_SetString(PyExc_ValueError,
942 "substring not found in string.rindex");
943 return NULL;
944 }
945 return PyInt_FromLong(result);
946}
947
948
949static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000950do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000951{
952 char *s = PyString_AS_STRING(self);
953 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000954
Guido van Rossum43713e52000-02-29 13:59:29 +0000955 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000956 return NULL;
957
958 i = 0;
959 if (striptype != RIGHTSTRIP) {
960 while (i < len && isspace(Py_CHARMASK(s[i]))) {
961 i++;
962 }
963 }
964
965 j = len;
966 if (striptype != LEFTSTRIP) {
967 do {
968 j--;
969 } while (j >= i && isspace(Py_CHARMASK(s[j])));
970 j++;
971 }
972
973 if (i == 0 && j == len) {
974 Py_INCREF(self);
975 return (PyObject*)self;
976 }
977 else
978 return PyString_FromStringAndSize(s+i, j-i);
979}
980
981
982static char strip__doc__[] =
983"S.strip() -> string\n\
984\n\
985Return a copy of the string S with leading and trailing\n\
986whitespace removed.";
987
988static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000989string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000990{
991 return do_strip(self, args, BOTHSTRIP);
992}
993
994
995static char lstrip__doc__[] =
996"S.lstrip() -> string\n\
997\n\
998Return a copy of the string S with leading whitespace removed.";
999
1000static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001001string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001002{
1003 return do_strip(self, args, LEFTSTRIP);
1004}
1005
1006
1007static char rstrip__doc__[] =
1008"S.rstrip() -> string\n\
1009\n\
1010Return a copy of the string S with trailing whitespace removed.";
1011
1012static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001013string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001014{
1015 return do_strip(self, args, RIGHTSTRIP);
1016}
1017
1018
1019static char lower__doc__[] =
1020"S.lower() -> string\n\
1021\n\
1022Return a copy of the string S converted to lowercase.";
1023
1024static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001025string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001026{
1027 char *s = PyString_AS_STRING(self), *s_new;
1028 int i, n = PyString_GET_SIZE(self);
1029 PyObject *new;
1030
Guido van Rossum43713e52000-02-29 13:59:29 +00001031 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001032 return NULL;
1033 new = PyString_FromStringAndSize(NULL, n);
1034 if (new == NULL)
1035 return NULL;
1036 s_new = PyString_AsString(new);
1037 for (i = 0; i < n; i++) {
1038 int c = Py_CHARMASK(*s++);
1039 if (isupper(c)) {
1040 *s_new = tolower(c);
1041 } else
1042 *s_new = c;
1043 s_new++;
1044 }
1045 return new;
1046}
1047
1048
1049static char upper__doc__[] =
1050"S.upper() -> string\n\
1051\n\
1052Return a copy of the string S converted to uppercase.";
1053
1054static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001055string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001056{
1057 char *s = PyString_AS_STRING(self), *s_new;
1058 int i, n = PyString_GET_SIZE(self);
1059 PyObject *new;
1060
Guido van Rossum43713e52000-02-29 13:59:29 +00001061 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001062 return NULL;
1063 new = PyString_FromStringAndSize(NULL, n);
1064 if (new == NULL)
1065 return NULL;
1066 s_new = PyString_AsString(new);
1067 for (i = 0; i < n; i++) {
1068 int c = Py_CHARMASK(*s++);
1069 if (islower(c)) {
1070 *s_new = toupper(c);
1071 } else
1072 *s_new = c;
1073 s_new++;
1074 }
1075 return new;
1076}
1077
1078
Guido van Rossum4c08d552000-03-10 22:55:18 +00001079static char title__doc__[] =
1080"S.title() -> string\n\
1081\n\
1082Return a titlecased version of S, i.e. words start with uppercase\n\
1083characters, all remaining cased characters have lowercase.";
1084
1085static PyObject*
1086string_title(PyUnicodeObject *self, PyObject *args)
1087{
1088 char *s = PyString_AS_STRING(self), *s_new;
1089 int i, n = PyString_GET_SIZE(self);
1090 int previous_is_cased = 0;
1091 PyObject *new;
1092
1093 if (!PyArg_ParseTuple(args, ":title"))
1094 return NULL;
1095 new = PyString_FromStringAndSize(NULL, n);
1096 if (new == NULL)
1097 return NULL;
1098 s_new = PyString_AsString(new);
1099 for (i = 0; i < n; i++) {
1100 int c = Py_CHARMASK(*s++);
1101 if (islower(c)) {
1102 if (!previous_is_cased)
1103 c = toupper(c);
1104 previous_is_cased = 1;
1105 } else if (isupper(c)) {
1106 if (previous_is_cased)
1107 c = tolower(c);
1108 previous_is_cased = 1;
1109 } else
1110 previous_is_cased = 0;
1111 *s_new++ = c;
1112 }
1113 return new;
1114}
1115
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116static char capitalize__doc__[] =
1117"S.capitalize() -> string\n\
1118\n\
1119Return a copy of the string S with only its first character\n\
1120capitalized.";
1121
1122static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001123string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001124{
1125 char *s = PyString_AS_STRING(self), *s_new;
1126 int i, n = PyString_GET_SIZE(self);
1127 PyObject *new;
1128
Guido van Rossum43713e52000-02-29 13:59:29 +00001129 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001130 return NULL;
1131 new = PyString_FromStringAndSize(NULL, n);
1132 if (new == NULL)
1133 return NULL;
1134 s_new = PyString_AsString(new);
1135 if (0 < n) {
1136 int c = Py_CHARMASK(*s++);
1137 if (islower(c))
1138 *s_new = toupper(c);
1139 else
1140 *s_new = c;
1141 s_new++;
1142 }
1143 for (i = 1; i < n; i++) {
1144 int c = Py_CHARMASK(*s++);
1145 if (isupper(c))
1146 *s_new = tolower(c);
1147 else
1148 *s_new = c;
1149 s_new++;
1150 }
1151 return new;
1152}
1153
1154
1155static char count__doc__[] =
1156"S.count(sub[, start[, end]]) -> int\n\
1157\n\
1158Return the number of occurrences of substring sub in string\n\
1159S[start:end]. Optional arguments start and end are\n\
1160interpreted as in slice notation.";
1161
1162static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001163string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001164{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001165 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166 int len = PyString_GET_SIZE(self), n;
1167 int i = 0, last = INT_MAX;
1168 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001169 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001170
Guido van Rossumc6821402000-05-08 14:08:05 +00001171 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1172 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001173 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001174
Guido van Rossum4c08d552000-03-10 22:55:18 +00001175 if (PyString_Check(subobj)) {
1176 sub = PyString_AS_STRING(subobj);
1177 n = PyString_GET_SIZE(subobj);
1178 }
1179 else if (PyUnicode_Check(subobj))
1180 return PyInt_FromLong(
1181 PyUnicode_Count((PyObject *)self, subobj, i, last));
1182 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1183 return NULL;
1184
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001185 if (last > len)
1186 last = len;
1187 if (last < 0)
1188 last += len;
1189 if (last < 0)
1190 last = 0;
1191 if (i < 0)
1192 i += len;
1193 if (i < 0)
1194 i = 0;
1195 m = last + 1 - n;
1196 if (n == 0)
1197 return PyInt_FromLong((long) (m-i));
1198
1199 r = 0;
1200 while (i < m) {
1201 if (!memcmp(s+i, sub, n)) {
1202 r++;
1203 i += n;
1204 } else {
1205 i++;
1206 }
1207 }
1208 return PyInt_FromLong((long) r);
1209}
1210
1211
1212static char swapcase__doc__[] =
1213"S.swapcase() -> string\n\
1214\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001215Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001216converted to lowercase and vice versa.";
1217
1218static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001219string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001220{
1221 char *s = PyString_AS_STRING(self), *s_new;
1222 int i, n = PyString_GET_SIZE(self);
1223 PyObject *new;
1224
Guido van Rossum43713e52000-02-29 13:59:29 +00001225 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001226 return NULL;
1227 new = PyString_FromStringAndSize(NULL, n);
1228 if (new == NULL)
1229 return NULL;
1230 s_new = PyString_AsString(new);
1231 for (i = 0; i < n; i++) {
1232 int c = Py_CHARMASK(*s++);
1233 if (islower(c)) {
1234 *s_new = toupper(c);
1235 }
1236 else if (isupper(c)) {
1237 *s_new = tolower(c);
1238 }
1239 else
1240 *s_new = c;
1241 s_new++;
1242 }
1243 return new;
1244}
1245
1246
1247static char translate__doc__[] =
1248"S.translate(table [,deletechars]) -> string\n\
1249\n\
1250Return a copy of the string S, where all characters occurring\n\
1251in the optional argument deletechars are removed, and the\n\
1252remaining characters have been mapped through the given\n\
1253translation table, which must be a string of length 256.";
1254
1255static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001256string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001257{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001258 register char *input, *output;
1259 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260 register int i, c, changed = 0;
1261 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001262 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001263 int inlen, tablen, dellen = 0;
1264 PyObject *result;
1265 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001266 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001267
Guido van Rossum4c08d552000-03-10 22:55:18 +00001268 if (!PyArg_ParseTuple(args, "O|O:translate",
1269 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001270 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001271
1272 if (PyString_Check(tableobj)) {
1273 table1 = PyString_AS_STRING(tableobj);
1274 tablen = PyString_GET_SIZE(tableobj);
1275 }
1276 else if (PyUnicode_Check(tableobj)) {
1277 /* Unicode .translate() does not support the deletechars
1278 parameter; instead a mapping to None will cause characters
1279 to be deleted. */
1280 if (delobj != NULL) {
1281 PyErr_SetString(PyExc_TypeError,
1282 "deletions are implemented differently for unicode");
1283 return NULL;
1284 }
1285 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1286 }
1287 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001289
1290 if (delobj != NULL) {
1291 if (PyString_Check(delobj)) {
1292 del_table = PyString_AS_STRING(delobj);
1293 dellen = PyString_GET_SIZE(delobj);
1294 }
1295 else if (PyUnicode_Check(delobj)) {
1296 PyErr_SetString(PyExc_TypeError,
1297 "deletions are implemented differently for unicode");
1298 return NULL;
1299 }
1300 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1301 return NULL;
1302
1303 if (tablen != 256) {
1304 PyErr_SetString(PyExc_ValueError,
1305 "translation table must be 256 characters long");
1306 return NULL;
1307 }
1308 }
1309 else {
1310 del_table = NULL;
1311 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001312 }
1313
1314 table = table1;
1315 inlen = PyString_Size(input_obj);
1316 result = PyString_FromStringAndSize((char *)NULL, inlen);
1317 if (result == NULL)
1318 return NULL;
1319 output_start = output = PyString_AsString(result);
1320 input = PyString_AsString(input_obj);
1321
1322 if (dellen == 0) {
1323 /* If no deletions are required, use faster code */
1324 for (i = inlen; --i >= 0; ) {
1325 c = Py_CHARMASK(*input++);
1326 if (Py_CHARMASK((*output++ = table[c])) != c)
1327 changed = 1;
1328 }
1329 if (changed)
1330 return result;
1331 Py_DECREF(result);
1332 Py_INCREF(input_obj);
1333 return input_obj;
1334 }
1335
1336 for (i = 0; i < 256; i++)
1337 trans_table[i] = Py_CHARMASK(table[i]);
1338
1339 for (i = 0; i < dellen; i++)
1340 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1341
1342 for (i = inlen; --i >= 0; ) {
1343 c = Py_CHARMASK(*input++);
1344 if (trans_table[c] != -1)
1345 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1346 continue;
1347 changed = 1;
1348 }
1349 if (!changed) {
1350 Py_DECREF(result);
1351 Py_INCREF(input_obj);
1352 return input_obj;
1353 }
1354 /* Fix the size of the resulting string */
1355 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1356 return NULL;
1357 return result;
1358}
1359
1360
1361/* What follows is used for implementing replace(). Perry Stoll. */
1362
1363/*
1364 mymemfind
1365
1366 strstr replacement for arbitrary blocks of memory.
1367
Barry Warsaw51ac5802000-03-20 16:36:48 +00001368 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369 contents of memory pointed to by PAT. Returns the index into MEM if
1370 found, or -1 if not found. If len of PAT is greater than length of
1371 MEM, the function returns -1.
1372*/
1373static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001374mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375{
1376 register int ii;
1377
1378 /* pattern can not occur in the last pat_len-1 chars */
1379 len -= pat_len;
1380
1381 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001382 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383 return ii;
1384 }
1385 }
1386 return -1;
1387}
1388
1389/*
1390 mymemcnt
1391
1392 Return the number of distinct times PAT is found in MEM.
1393 meaning mem=1111 and pat==11 returns 2.
1394 mem=11111 and pat==11 also return 2.
1395 */
1396static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001397mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398{
1399 register int offset = 0;
1400 int nfound = 0;
1401
1402 while (len >= 0) {
1403 offset = mymemfind(mem, len, pat, pat_len);
1404 if (offset == -1)
1405 break;
1406 mem += offset + pat_len;
1407 len -= offset + pat_len;
1408 nfound++;
1409 }
1410 return nfound;
1411}
1412
1413/*
1414 mymemreplace
1415
1416 Return a string in which all occurences of PAT in memory STR are
1417 replaced with SUB.
1418
1419 If length of PAT is less than length of STR or there are no occurences
1420 of PAT in STR, then the original string is returned. Otherwise, a new
1421 string is allocated here and returned.
1422
1423 on return, out_len is:
1424 the length of output string, or
1425 -1 if the input string is returned, or
1426 unchanged if an error occurs (no memory).
1427
1428 return value is:
1429 the new string allocated locally, or
1430 NULL if an error occurred.
1431*/
1432static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001433mymemreplace(const char *str, int len, /* input string */
1434 const char *pat, int pat_len, /* pattern string to find */
1435 const char *sub, int sub_len, /* substitution string */
1436 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001437 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438{
1439 char *out_s;
1440 char *new_s;
1441 int nfound, offset, new_len;
1442
1443 if (len == 0 || pat_len > len)
1444 goto return_same;
1445
1446 /* find length of output string */
1447 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001448 if (count < 0)
1449 count = INT_MAX;
1450 else if (nfound > count)
1451 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452 if (nfound == 0)
1453 goto return_same;
1454 new_len = len + nfound*(sub_len - pat_len);
1455
Guido van Rossumb18618d2000-05-03 23:44:39 +00001456 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457 if (new_s == NULL) return NULL;
1458
1459 *out_len = new_len;
1460 out_s = new_s;
1461
1462 while (len > 0) {
1463 /* find index of next instance of pattern */
1464 offset = mymemfind(str, len, pat, pat_len);
1465 /* if not found, break out of loop */
1466 if (offset == -1) break;
1467
1468 /* copy non matching part of input string */
1469 memcpy(new_s, str, offset); /* copy part of str before pat */
1470 str += offset + pat_len; /* move str past pattern */
1471 len -= offset + pat_len; /* reduce length of str remaining */
1472
1473 /* copy substitute into the output string */
1474 new_s += offset; /* move new_s to dest for sub string */
1475 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1476 new_s += sub_len; /* offset new_s past sub string */
1477
1478 /* break when we've done count replacements */
1479 if (--count == 0) break;
1480 }
1481 /* copy any remaining values into output string */
1482 if (len > 0)
1483 memcpy(new_s, str, len);
1484 return out_s;
1485
1486 return_same:
1487 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001488 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489}
1490
1491
1492static char replace__doc__[] =
1493"S.replace (old, new[, maxsplit]) -> string\n\
1494\n\
1495Return a copy of string S with all occurrences of substring\n\
1496old replaced by new. If the optional argument maxsplit is\n\
1497given, only the first maxsplit occurrences are replaced.";
1498
1499static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001500string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001501{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001502 const char *str = PyString_AS_STRING(self), *sub, *repl;
1503 char *new_s;
1504 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1505 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001507 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001508
Guido van Rossum4c08d552000-03-10 22:55:18 +00001509 if (!PyArg_ParseTuple(args, "OO|i:replace",
1510 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001512
1513 if (PyString_Check(subobj)) {
1514 sub = PyString_AS_STRING(subobj);
1515 sub_len = PyString_GET_SIZE(subobj);
1516 }
1517 else if (PyUnicode_Check(subobj))
1518 return PyUnicode_Replace((PyObject *)self,
1519 subobj, replobj, count);
1520 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1521 return NULL;
1522
1523 if (PyString_Check(replobj)) {
1524 repl = PyString_AS_STRING(replobj);
1525 repl_len = PyString_GET_SIZE(replobj);
1526 }
1527 else if (PyUnicode_Check(replobj))
1528 return PyUnicode_Replace((PyObject *)self,
1529 subobj, replobj, count);
1530 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1531 return NULL;
1532
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001533 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001534 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001535 return NULL;
1536 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001537 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001538 if (new_s == NULL) {
1539 PyErr_NoMemory();
1540 return NULL;
1541 }
1542 if (out_len == -1) {
1543 /* we're returning another reference to self */
1544 new = (PyObject*)self;
1545 Py_INCREF(new);
1546 }
1547 else {
1548 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001549 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001550 }
1551 return new;
1552}
1553
1554
1555static char startswith__doc__[] =
1556"S.startswith(prefix[, start[, end]]) -> int\n\
1557\n\
1558Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1559optional start, test S beginning at that position. With optional end, stop\n\
1560comparing S at that position.";
1561
1562static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001563string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001565 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001567 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568 int plen;
1569 int start = 0;
1570 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001571 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572
Guido van Rossumc6821402000-05-08 14:08:05 +00001573 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1574 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 return NULL;
1576 if (PyString_Check(subobj)) {
1577 prefix = PyString_AS_STRING(subobj);
1578 plen = PyString_GET_SIZE(subobj);
1579 }
1580 else if (PyUnicode_Check(subobj))
1581 return PyInt_FromLong(
1582 PyUnicode_Tailmatch((PyObject *)self,
1583 subobj, start, end, -1));
1584 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 return NULL;
1586
1587 /* adopt Java semantics for index out of range. it is legal for
1588 * offset to be == plen, but this only returns true if prefix is
1589 * the empty string.
1590 */
1591 if (start < 0 || start+plen > len)
1592 return PyInt_FromLong(0);
1593
1594 if (!memcmp(str+start, prefix, plen)) {
1595 /* did the match end after the specified end? */
1596 if (end < 0)
1597 return PyInt_FromLong(1);
1598 else if (end - start < plen)
1599 return PyInt_FromLong(0);
1600 else
1601 return PyInt_FromLong(1);
1602 }
1603 else return PyInt_FromLong(0);
1604}
1605
1606
1607static char endswith__doc__[] =
1608"S.endswith(suffix[, start[, end]]) -> int\n\
1609\n\
1610Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1611optional start, test S beginning at that position. With optional end, stop\n\
1612comparing S at that position.";
1613
1614static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001615string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001616{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001617 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001619 const char* suffix;
1620 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621 int start = 0;
1622 int end = -1;
1623 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001624 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625
Guido van Rossumc6821402000-05-08 14:08:05 +00001626 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1627 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001628 return NULL;
1629 if (PyString_Check(subobj)) {
1630 suffix = PyString_AS_STRING(subobj);
1631 slen = PyString_GET_SIZE(subobj);
1632 }
1633 else if (PyUnicode_Check(subobj))
1634 return PyInt_FromLong(
1635 PyUnicode_Tailmatch((PyObject *)self,
1636 subobj, start, end, +1));
1637 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 return NULL;
1639
Guido van Rossum4c08d552000-03-10 22:55:18 +00001640 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 return PyInt_FromLong(0);
1642
1643 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001644 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645
Guido van Rossum4c08d552000-03-10 22:55:18 +00001646 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647 return PyInt_FromLong(1);
1648 else return PyInt_FromLong(0);
1649}
1650
1651
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001652static char encode__doc__[] =
1653"S.encode([encoding[,errors]]) -> string\n\
1654\n\
1655Return an encoded string version of S. Default encoding is the current\n\
1656default string encoding. errors may be given to set a different error\n\
1657handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1658a ValueError. Other possible values are 'ignore' and 'replace'.";
1659
1660static PyObject *
1661string_encode(PyStringObject *self, PyObject *args)
1662{
1663 char *encoding = NULL;
1664 char *errors = NULL;
1665 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1666 return NULL;
1667 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1668}
1669
1670
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671static char expandtabs__doc__[] =
1672"S.expandtabs([tabsize]) -> string\n\
1673\n\
1674Return a copy of S where all tab characters are expanded using spaces.\n\
1675If tabsize is not given, a tab size of 8 characters is assumed.";
1676
1677static PyObject*
1678string_expandtabs(PyStringObject *self, PyObject *args)
1679{
1680 const char *e, *p;
1681 char *q;
1682 int i, j;
1683 PyObject *u;
1684 int tabsize = 8;
1685
1686 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1687 return NULL;
1688
1689 /* First pass: determine size of ouput string */
1690 i = j = 0;
1691 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1692 for (p = PyString_AS_STRING(self); p < e; p++)
1693 if (*p == '\t') {
1694 if (tabsize > 0)
1695 j += tabsize - (j % tabsize);
1696 }
1697 else {
1698 j++;
1699 if (*p == '\n' || *p == '\r') {
1700 i += j;
1701 j = 0;
1702 }
1703 }
1704
1705 /* Second pass: create output string and fill it */
1706 u = PyString_FromStringAndSize(NULL, i + j);
1707 if (!u)
1708 return NULL;
1709
1710 j = 0;
1711 q = PyString_AS_STRING(u);
1712
1713 for (p = PyString_AS_STRING(self); p < e; p++)
1714 if (*p == '\t') {
1715 if (tabsize > 0) {
1716 i = tabsize - (j % tabsize);
1717 j += i;
1718 while (i--)
1719 *q++ = ' ';
1720 }
1721 }
1722 else {
1723 j++;
1724 *q++ = *p;
1725 if (*p == '\n' || *p == '\r')
1726 j = 0;
1727 }
1728
1729 return u;
1730}
1731
1732static
1733PyObject *pad(PyStringObject *self,
1734 int left,
1735 int right,
1736 char fill)
1737{
1738 PyObject *u;
1739
1740 if (left < 0)
1741 left = 0;
1742 if (right < 0)
1743 right = 0;
1744
1745 if (left == 0 && right == 0) {
1746 Py_INCREF(self);
1747 return (PyObject *)self;
1748 }
1749
1750 u = PyString_FromStringAndSize(NULL,
1751 left + PyString_GET_SIZE(self) + right);
1752 if (u) {
1753 if (left)
1754 memset(PyString_AS_STRING(u), fill, left);
1755 memcpy(PyString_AS_STRING(u) + left,
1756 PyString_AS_STRING(self),
1757 PyString_GET_SIZE(self));
1758 if (right)
1759 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1760 fill, right);
1761 }
1762
1763 return u;
1764}
1765
1766static char ljust__doc__[] =
1767"S.ljust(width) -> string\n\
1768\n\
1769Return S left justified in a string of length width. Padding is\n\
1770done using spaces.";
1771
1772static PyObject *
1773string_ljust(PyStringObject *self, PyObject *args)
1774{
1775 int width;
1776 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1777 return NULL;
1778
1779 if (PyString_GET_SIZE(self) >= width) {
1780 Py_INCREF(self);
1781 return (PyObject*) self;
1782 }
1783
1784 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1785}
1786
1787
1788static char rjust__doc__[] =
1789"S.rjust(width) -> string\n\
1790\n\
1791Return S right justified in a string of length width. Padding is\n\
1792done using spaces.";
1793
1794static PyObject *
1795string_rjust(PyStringObject *self, PyObject *args)
1796{
1797 int width;
1798 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1799 return NULL;
1800
1801 if (PyString_GET_SIZE(self) >= width) {
1802 Py_INCREF(self);
1803 return (PyObject*) self;
1804 }
1805
1806 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1807}
1808
1809
1810static char center__doc__[] =
1811"S.center(width) -> string\n\
1812\n\
1813Return S centered in a string of length width. Padding is done\n\
1814using spaces.";
1815
1816static PyObject *
1817string_center(PyStringObject *self, PyObject *args)
1818{
1819 int marg, left;
1820 int width;
1821
1822 if (!PyArg_ParseTuple(args, "i:center", &width))
1823 return NULL;
1824
1825 if (PyString_GET_SIZE(self) >= width) {
1826 Py_INCREF(self);
1827 return (PyObject*) self;
1828 }
1829
1830 marg = width - PyString_GET_SIZE(self);
1831 left = marg / 2 + (marg & width & 1);
1832
1833 return pad(self, left, marg - left, ' ');
1834}
1835
1836#if 0
1837static char zfill__doc__[] =
1838"S.zfill(width) -> string\n\
1839\n\
1840Pad a numeric string x with zeros on the left, to fill a field\n\
1841of the specified width. The string x is never truncated.";
1842
1843static PyObject *
1844string_zfill(PyStringObject *self, PyObject *args)
1845{
1846 int fill;
1847 PyObject *u;
1848 char *str;
1849
1850 int width;
1851 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1852 return NULL;
1853
1854 if (PyString_GET_SIZE(self) >= width) {
1855 Py_INCREF(self);
1856 return (PyObject*) self;
1857 }
1858
1859 fill = width - PyString_GET_SIZE(self);
1860
1861 u = pad(self, fill, 0, '0');
1862 if (u == NULL)
1863 return NULL;
1864
1865 str = PyString_AS_STRING(u);
1866 if (str[fill] == '+' || str[fill] == '-') {
1867 /* move sign to beginning of string */
1868 str[0] = str[fill];
1869 str[fill] = '0';
1870 }
1871
1872 return u;
1873}
1874#endif
1875
1876static char isspace__doc__[] =
1877"S.isspace() -> int\n\
1878\n\
1879Return 1 if there are only whitespace characters in S,\n\
18800 otherwise.";
1881
1882static PyObject*
1883string_isspace(PyStringObject *self, PyObject *args)
1884{
Fred Drakeba096332000-07-09 07:04:36 +00001885 register const unsigned char *p
1886 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001887 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001888
1889 if (!PyArg_NoArgs(args))
1890 return NULL;
1891
1892 /* Shortcut for single character strings */
1893 if (PyString_GET_SIZE(self) == 1 &&
1894 isspace(*p))
1895 return PyInt_FromLong(1);
1896
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001897 /* Special case for empty strings */
1898 if (PyString_GET_SIZE(self) == 0)
1899 return PyInt_FromLong(0);
1900
Guido van Rossum4c08d552000-03-10 22:55:18 +00001901 e = p + PyString_GET_SIZE(self);
1902 for (; p < e; p++) {
1903 if (!isspace(*p))
1904 return PyInt_FromLong(0);
1905 }
1906 return PyInt_FromLong(1);
1907}
1908
1909
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001910static char isalpha__doc__[] =
1911"S.isalpha() -> int\n\
1912\n\
1913Return 1 if all characters in S are alphabetic\n\
1914and there is at least one character in S, 0 otherwise.";
1915
1916static PyObject*
1917string_isalpha(PyUnicodeObject *self, PyObject *args)
1918{
Fred Drakeba096332000-07-09 07:04:36 +00001919 register const unsigned char *p
1920 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001921 register const unsigned char *e;
1922
1923 if (!PyArg_NoArgs(args))
1924 return NULL;
1925
1926 /* Shortcut for single character strings */
1927 if (PyString_GET_SIZE(self) == 1 &&
1928 isalpha(*p))
1929 return PyInt_FromLong(1);
1930
1931 /* Special case for empty strings */
1932 if (PyString_GET_SIZE(self) == 0)
1933 return PyInt_FromLong(0);
1934
1935 e = p + PyString_GET_SIZE(self);
1936 for (; p < e; p++) {
1937 if (!isalpha(*p))
1938 return PyInt_FromLong(0);
1939 }
1940 return PyInt_FromLong(1);
1941}
1942
1943
1944static char isalnum__doc__[] =
1945"S.isalnum() -> int\n\
1946\n\
1947Return 1 if all characters in S are alphanumeric\n\
1948and there is at least one character in S, 0 otherwise.";
1949
1950static PyObject*
1951string_isalnum(PyUnicodeObject *self, PyObject *args)
1952{
Fred Drakeba096332000-07-09 07:04:36 +00001953 register const unsigned char *p
1954 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001955 register const unsigned char *e;
1956
1957 if (!PyArg_NoArgs(args))
1958 return NULL;
1959
1960 /* Shortcut for single character strings */
1961 if (PyString_GET_SIZE(self) == 1 &&
1962 isalnum(*p))
1963 return PyInt_FromLong(1);
1964
1965 /* Special case for empty strings */
1966 if (PyString_GET_SIZE(self) == 0)
1967 return PyInt_FromLong(0);
1968
1969 e = p + PyString_GET_SIZE(self);
1970 for (; p < e; p++) {
1971 if (!isalnum(*p))
1972 return PyInt_FromLong(0);
1973 }
1974 return PyInt_FromLong(1);
1975}
1976
1977
Guido van Rossum4c08d552000-03-10 22:55:18 +00001978static char isdigit__doc__[] =
1979"S.isdigit() -> int\n\
1980\n\
1981Return 1 if there are only digit characters in S,\n\
19820 otherwise.";
1983
1984static PyObject*
1985string_isdigit(PyStringObject *self, PyObject *args)
1986{
Fred Drakeba096332000-07-09 07:04:36 +00001987 register const unsigned char *p
1988 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001989 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001990
1991 if (!PyArg_NoArgs(args))
1992 return NULL;
1993
1994 /* Shortcut for single character strings */
1995 if (PyString_GET_SIZE(self) == 1 &&
1996 isdigit(*p))
1997 return PyInt_FromLong(1);
1998
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001999 /* Special case for empty strings */
2000 if (PyString_GET_SIZE(self) == 0)
2001 return PyInt_FromLong(0);
2002
Guido van Rossum4c08d552000-03-10 22:55:18 +00002003 e = p + PyString_GET_SIZE(self);
2004 for (; p < e; p++) {
2005 if (!isdigit(*p))
2006 return PyInt_FromLong(0);
2007 }
2008 return PyInt_FromLong(1);
2009}
2010
2011
2012static char islower__doc__[] =
2013"S.islower() -> int\n\
2014\n\
2015Return 1 if all cased characters in S are lowercase and there is\n\
2016at least one cased character in S, 0 otherwise.";
2017
2018static PyObject*
2019string_islower(PyStringObject *self, PyObject *args)
2020{
Fred Drakeba096332000-07-09 07:04:36 +00002021 register const unsigned char *p
2022 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002023 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002024 int cased;
2025
2026 if (!PyArg_NoArgs(args))
2027 return NULL;
2028
2029 /* Shortcut for single character strings */
2030 if (PyString_GET_SIZE(self) == 1)
2031 return PyInt_FromLong(islower(*p) != 0);
2032
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002033 /* Special case for empty strings */
2034 if (PyString_GET_SIZE(self) == 0)
2035 return PyInt_FromLong(0);
2036
Guido van Rossum4c08d552000-03-10 22:55:18 +00002037 e = p + PyString_GET_SIZE(self);
2038 cased = 0;
2039 for (; p < e; p++) {
2040 if (isupper(*p))
2041 return PyInt_FromLong(0);
2042 else if (!cased && islower(*p))
2043 cased = 1;
2044 }
2045 return PyInt_FromLong(cased);
2046}
2047
2048
2049static char isupper__doc__[] =
2050"S.isupper() -> int\n\
2051\n\
2052Return 1 if all cased characters in S are uppercase and there is\n\
2053at least one cased character in S, 0 otherwise.";
2054
2055static PyObject*
2056string_isupper(PyStringObject *self, PyObject *args)
2057{
Fred Drakeba096332000-07-09 07:04:36 +00002058 register const unsigned char *p
2059 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002060 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061 int cased;
2062
2063 if (!PyArg_NoArgs(args))
2064 return NULL;
2065
2066 /* Shortcut for single character strings */
2067 if (PyString_GET_SIZE(self) == 1)
2068 return PyInt_FromLong(isupper(*p) != 0);
2069
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002070 /* Special case for empty strings */
2071 if (PyString_GET_SIZE(self) == 0)
2072 return PyInt_FromLong(0);
2073
Guido van Rossum4c08d552000-03-10 22:55:18 +00002074 e = p + PyString_GET_SIZE(self);
2075 cased = 0;
2076 for (; p < e; p++) {
2077 if (islower(*p))
2078 return PyInt_FromLong(0);
2079 else if (!cased && isupper(*p))
2080 cased = 1;
2081 }
2082 return PyInt_FromLong(cased);
2083}
2084
2085
2086static char istitle__doc__[] =
2087"S.istitle() -> int\n\
2088\n\
2089Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2090may only follow uncased characters and lowercase characters only cased\n\
2091ones. Return 0 otherwise.";
2092
2093static PyObject*
2094string_istitle(PyStringObject *self, PyObject *args)
2095{
Fred Drakeba096332000-07-09 07:04:36 +00002096 register const unsigned char *p
2097 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002098 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099 int cased, previous_is_cased;
2100
2101 if (!PyArg_NoArgs(args))
2102 return NULL;
2103
2104 /* Shortcut for single character strings */
2105 if (PyString_GET_SIZE(self) == 1)
2106 return PyInt_FromLong(isupper(*p) != 0);
2107
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002108 /* Special case for empty strings */
2109 if (PyString_GET_SIZE(self) == 0)
2110 return PyInt_FromLong(0);
2111
Guido van Rossum4c08d552000-03-10 22:55:18 +00002112 e = p + PyString_GET_SIZE(self);
2113 cased = 0;
2114 previous_is_cased = 0;
2115 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002116 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002117
2118 if (isupper(ch)) {
2119 if (previous_is_cased)
2120 return PyInt_FromLong(0);
2121 previous_is_cased = 1;
2122 cased = 1;
2123 }
2124 else if (islower(ch)) {
2125 if (!previous_is_cased)
2126 return PyInt_FromLong(0);
2127 previous_is_cased = 1;
2128 cased = 1;
2129 }
2130 else
2131 previous_is_cased = 0;
2132 }
2133 return PyInt_FromLong(cased);
2134}
2135
2136
2137static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002138"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002139\n\
2140Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002141Line breaks are not included in the resulting list unless keepends\n\
2142is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002143
2144#define SPLIT_APPEND(data, left, right) \
2145 str = PyString_FromStringAndSize(data + left, right - left); \
2146 if (!str) \
2147 goto onError; \
2148 if (PyList_Append(list, str)) { \
2149 Py_DECREF(str); \
2150 goto onError; \
2151 } \
2152 else \
2153 Py_DECREF(str);
2154
2155static PyObject*
2156string_splitlines(PyStringObject *self, PyObject *args)
2157{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158 register int i;
2159 register int j;
2160 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002161 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002162 PyObject *list;
2163 PyObject *str;
2164 char *data;
2165
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002166 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002167 return NULL;
2168
2169 data = PyString_AS_STRING(self);
2170 len = PyString_GET_SIZE(self);
2171
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172 list = PyList_New(0);
2173 if (!list)
2174 goto onError;
2175
2176 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002177 int eol;
2178
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179 /* Find a line and append it */
2180 while (i < len && data[i] != '\n' && data[i] != '\r')
2181 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002182
2183 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002184 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002185 if (i < len) {
2186 if (data[i] == '\r' && i + 1 < len &&
2187 data[i+1] == '\n')
2188 i += 2;
2189 else
2190 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002191 if (keepends)
2192 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002194 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195 j = i;
2196 }
2197 if (j < len) {
2198 SPLIT_APPEND(data, j, len);
2199 }
2200
2201 return list;
2202
2203 onError:
2204 Py_DECREF(list);
2205 return NULL;
2206}
2207
2208#undef SPLIT_APPEND
2209
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210
2211static PyMethodDef
2212string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213 /* Counterparts of the obsolete stropmodule functions; except
2214 string.maketrans(). */
2215 {"join", (PyCFunction)string_join, 1, join__doc__},
2216 {"split", (PyCFunction)string_split, 1, split__doc__},
2217 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2218 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2219 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2220 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2221 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2222 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2223 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002224 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2225 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2227 {"count", (PyCFunction)string_count, 1, count__doc__},
2228 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2229 {"find", (PyCFunction)string_find, 1, find__doc__},
2230 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2233 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2234 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2235 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2237 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2238 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002239 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2240 {"title", (PyCFunction)string_title, 1, title__doc__},
2241 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2242 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2243 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002244 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2246 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2247#if 0
2248 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2249#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 {NULL, NULL} /* sentinel */
2251};
2252
2253static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002254string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255{
2256 return Py_FindMethod(string_methods, (PyObject*)s, name);
2257}
2258
2259
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002260PyTypeObject PyString_Type = {
2261 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002262 0,
2263 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002264 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002265 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002266 (destructor)string_dealloc, /*tp_dealloc*/
2267 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002269 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002270 (cmpfunc)string_compare, /*tp_compare*/
2271 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002272 0, /*tp_as_number*/
2273 &string_as_sequence, /*tp_as_sequence*/
2274 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002275 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002276 0, /*tp_call*/
2277 0, /*tp_str*/
2278 0, /*tp_getattro*/
2279 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002280 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002281 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002282 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002283};
2284
2285void
Fred Drakeba096332000-07-09 07:04:36 +00002286PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002287{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002288 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002289 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002290 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002291 if (w == NULL || !PyString_Check(*pv)) {
2292 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002293 *pv = NULL;
2294 return;
2295 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002296 v = string_concat((PyStringObject *) *pv, w);
2297 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002298 *pv = v;
2299}
2300
Guido van Rossum013142a1994-08-30 08:19:36 +00002301void
Fred Drakeba096332000-07-09 07:04:36 +00002302PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002303{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002304 PyString_Concat(pv, w);
2305 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002306}
2307
2308
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002309/* The following function breaks the notion that strings are immutable:
2310 it changes the size of a string. We get away with this only if there
2311 is only one module referencing the object. You can also think of it
2312 as creating a new string object and destroying the old one, only
2313 more efficiently. In any case, don't use this if the string may
2314 already be known to some other part of the code... */
2315
2316int
Fred Drakeba096332000-07-09 07:04:36 +00002317_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002318{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002319 register PyObject *v;
2320 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002321 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002322 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002323 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002324 Py_DECREF(v);
2325 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002326 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002327 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002328 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002329#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002330 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002331#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002332 _Py_ForgetReference(v);
2333 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002334 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002335 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002336 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002337 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002338 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002339 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002340 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002341 _Py_NewReference(*pv);
2342 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002343 sv->ob_size = newsize;
2344 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002345 return 0;
2346}
Guido van Rossume5372401993-03-16 12:15:04 +00002347
2348/* Helpers for formatstring */
2349
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002350static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002351getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002352{
2353 int argidx = *p_argidx;
2354 if (argidx < arglen) {
2355 (*p_argidx)++;
2356 if (arglen < 0)
2357 return args;
2358 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002359 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002360 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002361 PyErr_SetString(PyExc_TypeError,
2362 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002363 return NULL;
2364}
2365
2366#define F_LJUST (1<<0)
2367#define F_SIGN (1<<1)
2368#define F_BLANK (1<<2)
2369#define F_ALT (1<<3)
2370#define F_ZERO (1<<4)
2371
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002372static int
Fred Drakeba096332000-07-09 07:04:36 +00002373formatfloat(char *buf, size_t buflen, int flags,
2374 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002375{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002376 /* fmt = '%#.' + `prec` + `type`
2377 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002378 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002379 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002380 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002381 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002382 if (prec < 0)
2383 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002384 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2385 type = 'g';
2386 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002387 /* worst case length calc to ensure no buffer overrun:
2388 fmt = %#.<prec>g
2389 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2390 for any double rep.)
2391 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2392 If prec=0 the effective precision is 1 (the leading digit is
2393 always given), therefore increase by one to 10+prec. */
2394 if (buflen <= (size_t)10 + (size_t)prec) {
2395 PyErr_SetString(PyExc_OverflowError,
2396 "formatted float is too long (precision too long?)");
2397 return -1;
2398 }
Guido van Rossume5372401993-03-16 12:15:04 +00002399 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002400 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002401}
2402
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002403static int
Fred Drakeba096332000-07-09 07:04:36 +00002404formatint(char *buf, size_t buflen, int flags,
2405 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002406{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002407 /* fmt = '%#.' + `prec` + 'l' + `type`
2408 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002409 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002410 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002411 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002412 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002413 if (prec < 0)
2414 prec = 1;
2415 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002416 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2417 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2418 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2419 PyErr_SetString(PyExc_OverflowError,
2420 "formatted integer is too long (precision too long?)");
2421 return -1;
2422 }
Guido van Rossume5372401993-03-16 12:15:04 +00002423 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002424 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002425}
2426
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002427static int
Fred Drakeba096332000-07-09 07:04:36 +00002428formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002429{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002430 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002431 if (PyString_Check(v)) {
2432 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002433 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002434 }
2435 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002436 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002437 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002438 }
2439 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002440 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002441}
2442
Guido van Rossum013142a1994-08-30 08:19:36 +00002443
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002444/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2445
2446 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2447 chars are formatted. XXX This is a magic number. Each formatting
2448 routine does bounds checking to ensure no overflow, but a better
2449 solution may be to malloc a buffer of appropriate size for each
2450 format. For now, the current solution is sufficient.
2451*/
2452#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002453
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002454PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002455PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002456{
2457 char *fmt, *res;
2458 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002459 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002460 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002461 PyObject *dict = NULL;
2462 if (format == NULL || !PyString_Check(format) || args == NULL) {
2463 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002464 return NULL;
2465 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002466 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002467 fmt = PyString_AsString(format);
2468 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002469 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002470 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002471 if (result == NULL)
2472 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002473 res = PyString_AsString(result);
2474 if (PyTuple_Check(args)) {
2475 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002476 argidx = 0;
2477 }
2478 else {
2479 arglen = -1;
2480 argidx = -2;
2481 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002482 if (args->ob_type->tp_as_mapping)
2483 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002484 while (--fmtcnt >= 0) {
2485 if (*fmt != '%') {
2486 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002487 rescnt = fmtcnt + 100;
2488 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002489 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002490 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002491 res = PyString_AsString(result)
2492 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002493 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002494 }
2495 *res++ = *fmt++;
2496 }
2497 else {
2498 /* Got a format specifier */
2499 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002500 int width = -1;
2501 int prec = -1;
2502 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002503 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002504 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002505 PyObject *v = NULL;
2506 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002507 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002508 int sign;
2509 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002510 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002511 char *fmt_start = fmt;
2512
Guido van Rossumda9c2711996-12-05 21:58:58 +00002513 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002514 if (*fmt == '(') {
2515 char *keystart;
2516 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002517 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002518 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002519
2520 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002521 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002522 "format requires a mapping");
2523 goto error;
2524 }
2525 ++fmt;
2526 --fmtcnt;
2527 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002528 /* Skip over balanced parentheses */
2529 while (pcount > 0 && --fmtcnt >= 0) {
2530 if (*fmt == ')')
2531 --pcount;
2532 else if (*fmt == '(')
2533 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002534 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002535 }
2536 keylen = fmt - keystart - 1;
2537 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002538 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002539 "incomplete format key");
2540 goto error;
2541 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002542 key = PyString_FromStringAndSize(keystart,
2543 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002544 if (key == NULL)
2545 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002546 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002547 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002548 args_owned = 0;
2549 }
2550 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002551 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002552 if (args == NULL) {
2553 goto error;
2554 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002555 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002556 arglen = -1;
2557 argidx = -2;
2558 }
Guido van Rossume5372401993-03-16 12:15:04 +00002559 while (--fmtcnt >= 0) {
2560 switch (c = *fmt++) {
2561 case '-': flags |= F_LJUST; continue;
2562 case '+': flags |= F_SIGN; continue;
2563 case ' ': flags |= F_BLANK; continue;
2564 case '#': flags |= F_ALT; continue;
2565 case '0': flags |= F_ZERO; continue;
2566 }
2567 break;
2568 }
2569 if (c == '*') {
2570 v = getnextarg(args, arglen, &argidx);
2571 if (v == NULL)
2572 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002573 if (!PyInt_Check(v)) {
2574 PyErr_SetString(PyExc_TypeError,
2575 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002576 goto error;
2577 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002578 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002579 if (width < 0) {
2580 flags |= F_LJUST;
2581 width = -width;
2582 }
Guido van Rossume5372401993-03-16 12:15:04 +00002583 if (--fmtcnt >= 0)
2584 c = *fmt++;
2585 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002586 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002587 width = c - '0';
2588 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002589 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002590 if (!isdigit(c))
2591 break;
2592 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002593 PyErr_SetString(
2594 PyExc_ValueError,
2595 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002596 goto error;
2597 }
2598 width = width*10 + (c - '0');
2599 }
2600 }
2601 if (c == '.') {
2602 prec = 0;
2603 if (--fmtcnt >= 0)
2604 c = *fmt++;
2605 if (c == '*') {
2606 v = getnextarg(args, arglen, &argidx);
2607 if (v == NULL)
2608 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002609 if (!PyInt_Check(v)) {
2610 PyErr_SetString(
2611 PyExc_TypeError,
2612 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002613 goto error;
2614 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002615 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002616 if (prec < 0)
2617 prec = 0;
2618 if (--fmtcnt >= 0)
2619 c = *fmt++;
2620 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002621 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002622 prec = c - '0';
2623 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002624 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002625 if (!isdigit(c))
2626 break;
2627 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002628 PyErr_SetString(
2629 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002630 "prec too big");
2631 goto error;
2632 }
2633 prec = prec*10 + (c - '0');
2634 }
2635 }
2636 } /* prec */
2637 if (fmtcnt >= 0) {
2638 if (c == 'h' || c == 'l' || c == 'L') {
2639 size = c;
2640 if (--fmtcnt >= 0)
2641 c = *fmt++;
2642 }
2643 }
2644 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002645 PyErr_SetString(PyExc_ValueError,
2646 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002647 goto error;
2648 }
2649 if (c != '%') {
2650 v = getnextarg(args, arglen, &argidx);
2651 if (v == NULL)
2652 goto error;
2653 }
2654 sign = 0;
2655 fill = ' ';
2656 switch (c) {
2657 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002658 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002659 len = 1;
2660 break;
2661 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002662 case 'r':
2663 if (PyUnicode_Check(v)) {
2664 fmt = fmt_start;
2665 goto unicode;
2666 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002667 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002668 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002669 else
2670 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002671 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002672 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002673 if (!PyString_Check(temp)) {
2674 PyErr_SetString(PyExc_TypeError,
2675 "%s argument has non-string str()");
2676 goto error;
2677 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002678 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002679 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002680 if (prec >= 0 && len > prec)
2681 len = prec;
2682 break;
2683 case 'i':
2684 case 'd':
2685 case 'u':
2686 case 'o':
2687 case 'x':
2688 case 'X':
2689 if (c == 'i')
2690 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002691 pbuf = formatbuf;
2692 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002693 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002694 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002695 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002696 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002697 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002698 if ((flags&F_ALT) &&
2699 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002700 pbuf[0] == '0' && pbuf[1] == c) {
2701 *res++ = *pbuf++;
2702 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002703 rescnt -= 2;
2704 len -= 2;
2705 width -= 2;
2706 if (width < 0)
2707 width = 0;
2708 }
2709 }
Guido van Rossume5372401993-03-16 12:15:04 +00002710 break;
2711 case 'e':
2712 case 'E':
2713 case 'f':
2714 case 'g':
2715 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002716 pbuf = formatbuf;
2717 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002718 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002719 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002720 sign = 1;
2721 if (flags&F_ZERO)
2722 fill = '0';
2723 break;
2724 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002725 pbuf = formatbuf;
2726 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002727 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002728 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002729 break;
2730 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002731 PyErr_Format(PyExc_ValueError,
2732 "unsupported format character '%c' (0x%x)",
2733 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002734 goto error;
2735 }
2736 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002737 if (*pbuf == '-' || *pbuf == '+') {
2738 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002739 len--;
2740 }
2741 else if (flags & F_SIGN)
2742 sign = '+';
2743 else if (flags & F_BLANK)
2744 sign = ' ';
2745 else
2746 sign = '\0';
2747 }
2748 if (width < len)
2749 width = len;
2750 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002751 reslen -= rescnt;
2752 rescnt = width + fmtcnt + 100;
2753 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002754 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002755 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002756 res = PyString_AsString(result)
2757 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002758 }
2759 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002760 if (fill != ' ')
2761 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002762 rescnt--;
2763 if (width > len)
2764 width--;
2765 }
2766 if (width > len && !(flags&F_LJUST)) {
2767 do {
2768 --rescnt;
2769 *res++ = fill;
2770 } while (--width > len);
2771 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002772 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002773 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002774 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002775 res += len;
2776 rescnt -= len;
2777 while (--width >= len) {
2778 --rescnt;
2779 *res++ = ' ';
2780 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002781 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002782 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002783 "not all arguments converted");
2784 goto error;
2785 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002786 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002787 } /* '%' */
2788 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002789 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 PyErr_SetString(PyExc_TypeError,
2791 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002792 goto error;
2793 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002794 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002795 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002796 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002797 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002798 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002799
2800 unicode:
2801 if (args_owned) {
2802 Py_DECREF(args);
2803 args_owned = 0;
2804 }
2805 /* Fiddle args right (remove the first argidx-1 arguments) */
2806 --argidx;
2807 if (PyTuple_Check(orig_args) && argidx > 0) {
2808 PyObject *v;
2809 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2810 v = PyTuple_New(n);
2811 if (v == NULL)
2812 goto error;
2813 while (--n >= 0) {
2814 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2815 Py_INCREF(w);
2816 PyTuple_SET_ITEM(v, n, w);
2817 }
2818 args = v;
2819 } else {
2820 Py_INCREF(orig_args);
2821 args = orig_args;
2822 }
2823 /* Paste rest of format string to what we have of the result
2824 string; we reuse result for this */
2825 rescnt = res - PyString_AS_STRING(result);
2826 fmtcnt = PyString_GET_SIZE(format) - \
2827 (fmt - PyString_AS_STRING(format));
2828 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2829 Py_DECREF(args);
2830 goto error;
2831 }
2832 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2833 format = result;
2834 /* Let Unicode do its magic */
2835 result = PyUnicode_Format(format, args);
2836 Py_DECREF(format);
2837 Py_DECREF(args);
2838 return result;
2839
Guido van Rossume5372401993-03-16 12:15:04 +00002840 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002841 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002842 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002843 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002844 }
Guido van Rossume5372401993-03-16 12:15:04 +00002845 return NULL;
2846}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002847
2848
2849#ifdef INTERN_STRINGS
2850
2851static PyObject *interned;
2852
2853void
Fred Drakeba096332000-07-09 07:04:36 +00002854PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002855{
2856 register PyStringObject *s = (PyStringObject *)(*p);
2857 PyObject *t;
2858 if (s == NULL || !PyString_Check(s))
2859 Py_FatalError("PyString_InternInPlace: strings only please!");
2860 if ((t = s->ob_sinterned) != NULL) {
2861 if (t == (PyObject *)s)
2862 return;
2863 Py_INCREF(t);
2864 *p = t;
2865 Py_DECREF(s);
2866 return;
2867 }
2868 if (interned == NULL) {
2869 interned = PyDict_New();
2870 if (interned == NULL)
2871 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002872 }
2873 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2874 Py_INCREF(t);
2875 *p = s->ob_sinterned = t;
2876 Py_DECREF(s);
2877 return;
2878 }
2879 t = (PyObject *)s;
2880 if (PyDict_SetItem(interned, t, t) == 0) {
2881 s->ob_sinterned = t;
2882 return;
2883 }
2884 PyErr_Clear();
2885}
2886
2887
2888PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002889PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002890{
2891 PyObject *s = PyString_FromString(cp);
2892 if (s == NULL)
2893 return NULL;
2894 PyString_InternInPlace(&s);
2895 return s;
2896}
2897
2898#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002899
2900void
Fred Drakeba096332000-07-09 07:04:36 +00002901PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00002902{
2903 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002904 for (i = 0; i < UCHAR_MAX + 1; i++) {
2905 Py_XDECREF(characters[i]);
2906 characters[i] = NULL;
2907 }
2908#ifndef DONT_SHARE_SHORT_STRINGS
2909 Py_XDECREF(nullstring);
2910 nullstring = NULL;
2911#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002912#ifdef INTERN_STRINGS
2913 if (interned) {
2914 int pos, changed;
2915 PyObject *key, *value;
2916 do {
2917 changed = 0;
2918 pos = 0;
2919 while (PyDict_Next(interned, &pos, &key, &value)) {
2920 if (key->ob_refcnt == 2 && key == value) {
2921 PyDict_DelItem(interned, key);
2922 changed = 1;
2923 }
2924 }
2925 } while (changed);
2926 }
2927#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002928}