blob: 7da58dae8cb45af34d4cf5ac70469e5fe6f642fc [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* String object implementation */
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014
Guido van Rossum71160aa1997-06-03 18:03:18 +000015#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000016#include <ctype.h>
17
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018#ifdef COUNT_ALLOCS
19int null_strings, one_strings;
20#endif
21
Guido van Rossum03093a21994-09-28 15:51:32 +000022#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000023#include <limits.h>
24#else
25#ifndef UCHAR_MAX
26#define UCHAR_MAX 255
27#endif
28#endif
29
Guido van Rossumc0b618a1997-05-02 03:12:38 +000030static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000031#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000032static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000033#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000034
35/*
36 Newsizedstringobject() and newstringobject() try in certain cases
37 to share string objects. When the size of the string is zero,
38 these routines always return a pointer to the same string object;
39 when the size is one, they return a pointer to an already existing
40 object if the contents of the string is known. For
41 newstringobject() this is always the case, for
42 newsizedstringobject() this is the case when the first argument in
43 not NULL.
44 A common practice to allocate a string and then fill it in or
45 change it must be done carefully. It is only allowed to change the
46 contents of the string if the obect was gotten from
47 newsizedstringobject() with a NULL first argument, because in the
48 future these routines may try to do even more sharing of objects.
49*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000050PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000051PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000052{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000071#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
73 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000074 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000075 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079#ifdef CACHE_HASH
80 op->ob_shash = -1;
81#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000082#ifdef INTERN_STRINGS
83 op->ob_sinterned = NULL;
84#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000085 if (str != NULL)
86 memcpy(op->ob_sval, str, size);
87 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 if (size == 0) {
90 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 } else if (size == 1 && str != NULL) {
93 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000103 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000105 if (size > INT_MAX) {
106 PyErr_SetString(PyExc_OverflowError,
107 "string is too long for a Python string");
108 return NULL;
109 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000110#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000111 if (size == 0 && (op = nullstring) != NULL) {
112#ifdef COUNT_ALLOCS
113 null_strings++;
114#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000115 Py_INCREF(op);
116 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 }
118 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
119#ifdef COUNT_ALLOCS
120 one_strings++;
121#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 Py_INCREF(op);
123 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000124 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000125#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126
127 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133#ifdef CACHE_HASH
134 op->ob_shash = -1;
135#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000136#ifdef INTERN_STRINGS
137 op->ob_sinterned = NULL;
138#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000140#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 if (size == 0) {
142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
145 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000148#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000150}
151
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000152PyObject *PyString_Decode(const char *s,
153 int size,
154 const char *encoding,
155 const char *errors)
156{
157 PyObject *buffer = NULL, *str;
158
159 if (encoding == NULL)
160 encoding = PyUnicode_GetDefaultEncoding();
161
162 /* Decode via the codec registry */
163 buffer = PyBuffer_FromMemory((void *)s, size);
164 if (buffer == NULL)
165 goto onError;
166 str = PyCodec_Decode(buffer, encoding, errors);
167 if (str == NULL)
168 goto onError;
169 /* Convert Unicode to a string using the default encoding */
170 if (PyUnicode_Check(str)) {
171 PyObject *temp = str;
172 str = PyUnicode_AsEncodedString(str, NULL, NULL);
173 Py_DECREF(temp);
174 if (str == NULL)
175 goto onError;
176 }
177 if (!PyString_Check(str)) {
178 PyErr_Format(PyExc_TypeError,
179 "decoder did not return an string object (type=%.400s)",
180 str->ob_type->tp_name);
181 Py_DECREF(str);
182 goto onError;
183 }
184 Py_DECREF(buffer);
185 return str;
186
187 onError:
188 Py_XDECREF(buffer);
189 return NULL;
190}
191
192PyObject *PyString_Encode(const char *s,
193 int size,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v, *str;
198
199 str = PyString_FromStringAndSize(s, size);
200 if (str == NULL)
201 return NULL;
202 v = PyString_AsEncodedString(str, encoding, errors);
203 Py_DECREF(str);
204 return v;
205}
206
207PyObject *PyString_AsEncodedString(PyObject *str,
208 const char *encoding,
209 const char *errors)
210{
211 PyObject *v;
212
213 if (!PyString_Check(str)) {
214 PyErr_BadArgument();
215 goto onError;
216 }
217
218 if (encoding == NULL)
219 encoding = PyUnicode_GetDefaultEncoding();
220
221 /* Encode via the codec registry */
222 v = PyCodec_Encode(str, encoding, errors);
223 if (v == NULL)
224 goto onError;
225 /* Convert Unicode to a string using the default encoding */
226 if (PyUnicode_Check(v)) {
227 PyObject *temp = v;
228 v = PyUnicode_AsEncodedString(v, NULL, NULL);
229 Py_DECREF(temp);
230 if (v == NULL)
231 goto onError;
232 }
233 if (!PyString_Check(v)) {
234 PyErr_Format(PyExc_TypeError,
235 "encoder did not return a string object (type=%.400s)",
236 v->ob_type->tp_name);
237 Py_DECREF(v);
238 goto onError;
239 }
240 return v;
241
242 onError:
243 return NULL;
244}
245
Guido van Rossum234f9421993-06-17 12:35:49 +0000246static void
Fred Drakeba096332000-07-09 07:04:36 +0000247string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000248{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000249 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000250}
251
Guido van Rossumd7047b31995-01-02 19:07:15 +0000252int
Fred Drakeba096332000-07-09 07:04:36 +0000253PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000255 if (!PyString_Check(op)) {
256 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000257 return -1;
258 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000259 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260}
261
262/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000263PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000265 if (!PyString_Check(op)) {
266 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 return NULL;
268 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000269 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000270}
271
272/* Methods */
273
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000274static int
Fred Drakeba096332000-07-09 07:04:36 +0000275string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276{
277 int i;
278 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000279 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000280 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000283 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000285
286 /* figure out which quote to use; single is prefered */
287 quote = '\'';
288 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
289 quote = '"';
290
291 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000292 for (i = 0; i < op->ob_size; i++) {
293 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000294 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000295 fprintf(fp, "\\%c", c);
296 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000297 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000299 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000301 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000302 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303}
304
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000305static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000306string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000308 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
309 PyObject *v;
310 if (newsize > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "string is too large to make repr");
313 }
314 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000315 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000317 }
318 else {
319 register int i;
320 register char c;
321 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000322 int quote;
323
324 /* figure out which quote to use; single is prefered */
325 quote = '\'';
326 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
327 quote = '"';
328
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000329 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000330 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000331 for (i = 0; i < op->ob_size; i++) {
332 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 *p++ = '\\', *p++ = c;
335 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 while (*p != '\0')
338 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339 }
340 else
341 *p++ = c;
342 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000343 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 _PyString_Resize(
346 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000347 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349}
350
351static int
Fred Drakeba096332000-07-09 07:04:36 +0000352string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000353{
354 return a->ob_size;
355}
356
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000357static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000358string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000359{
360 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000361 register PyStringObject *op;
362 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000363 if (PyUnicode_Check(bb))
364 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000365 PyErr_Format(PyExc_TypeError,
366 "cannot add type \"%.200s\" to string",
367 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 return NULL;
369 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000370#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371 /* Optimize cases with empty left or right operand */
372 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000373 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000374 return bb;
375 }
376 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 Py_INCREF(a);
378 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 }
380 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000381 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000382 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000383 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000384 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000385 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000386 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000387#ifdef CACHE_HASH
388 op->ob_shash = -1;
389#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000390#ifdef INTERN_STRINGS
391 op->ob_sinterned = NULL;
392#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000393 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
394 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
395 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000396 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397#undef b
398}
399
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000400static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000401string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402{
403 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000404 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000405 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406 if (n < 0)
407 n = 0;
408 size = a->ob_size * n;
409 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000410 Py_INCREF(a);
411 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000413 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000414 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000415 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000416 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000418 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000419#ifdef CACHE_HASH
420 op->ob_shash = -1;
421#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000422#ifdef INTERN_STRINGS
423 op->ob_sinterned = NULL;
424#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000425 for (i = 0; i < size; i += a->ob_size)
426 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
427 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000429}
430
431/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
432
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000434string_slice(register PyStringObject *a, register int i, register int j)
435 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436{
437 if (i < 0)
438 i = 0;
439 if (j < 0)
440 j = 0; /* Avoid signed/unsigned bug in next line */
441 if (j > a->ob_size)
442 j = a->ob_size;
443 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000444 Py_INCREF(a);
445 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000446 }
447 if (j < i)
448 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000449 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450}
451
Guido van Rossum9284a572000-03-07 15:53:43 +0000452static int
Fred Drakeba096332000-07-09 07:04:36 +0000453string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000454{
455 register char *s, *end;
456 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000457 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000458 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000459 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000460 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000461 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000462 return -1;
463 }
464 c = PyString_AsString(el)[0];
465 s = PyString_AsString(a);
466 end = s + PyString_Size(a);
467 while (s < end) {
468 if (c == *s++)
469 return 1;
470 }
471 return 0;
472}
473
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000475string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000476{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000477 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000478 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000479 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000480 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 return NULL;
482 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000483 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000484 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000485#ifdef COUNT_ALLOCS
486 if (v != NULL)
487 one_strings++;
488#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000489 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000491 if (v == NULL)
492 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000493 characters[c] = (PyStringObject *) v;
494 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000495 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000496 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000497 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000498}
499
500static int
Fred Drakeba096332000-07-09 07:04:36 +0000501string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000502{
Guido van Rossum253919f1991-02-13 23:18:39 +0000503 int len_a = a->ob_size, len_b = b->ob_size;
504 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000505 int cmp;
506 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000507 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000508 if (cmp == 0)
509 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
510 if (cmp != 0)
511 return cmp;
512 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000513 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
Guido van Rossum9bfef441993-03-29 10:43:31 +0000516static long
Fred Drakeba096332000-07-09 07:04:36 +0000517string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000518{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000519 register int len;
520 register unsigned char *p;
521 register long x;
522
523#ifdef CACHE_HASH
524 if (a->ob_shash != -1)
525 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000526#ifdef INTERN_STRINGS
527 if (a->ob_sinterned != NULL)
528 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000529 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000530#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000531#endif
532 len = a->ob_size;
533 p = (unsigned char *) a->ob_sval;
534 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000535 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000536 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000537 x ^= a->ob_size;
538 if (x == -1)
539 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000540#ifdef CACHE_HASH
541 a->ob_shash = x;
542#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000543 return x;
544}
545
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000546static int
Fred Drakeba096332000-07-09 07:04:36 +0000547string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000548{
549 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000550 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000551 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000552 return -1;
553 }
554 *ptr = (void *)self->ob_sval;
555 return self->ob_size;
556}
557
558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000560{
Guido van Rossum045e6881997-09-08 18:30:11 +0000561 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000562 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000563 return -1;
564}
565
566static int
Fred Drakeba096332000-07-09 07:04:36 +0000567string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000568{
569 if ( lenp )
570 *lenp = self->ob_size;
571 return 1;
572}
573
Guido van Rossum1db70701998-10-08 02:18:52 +0000574static int
Fred Drakeba096332000-07-09 07:04:36 +0000575string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000576{
577 if ( index != 0 ) {
578 PyErr_SetString(PyExc_SystemError,
579 "accessing non-existent string segment");
580 return -1;
581 }
582 *ptr = self->ob_sval;
583 return self->ob_size;
584}
585
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000587 (inquiry)string_length, /*sq_length*/
588 (binaryfunc)string_concat, /*sq_concat*/
589 (intargfunc)string_repeat, /*sq_repeat*/
590 (intargfunc)string_item, /*sq_item*/
591 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000592 0, /*sq_ass_item*/
593 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000594 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000595};
596
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000597static PyBufferProcs string_as_buffer = {
598 (getreadbufferproc)string_buffer_getreadbuf,
599 (getwritebufferproc)string_buffer_getwritebuf,
600 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000601 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000602};
603
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000604
605
606#define LEFTSTRIP 0
607#define RIGHTSTRIP 1
608#define BOTHSTRIP 2
609
610
611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000612split_whitespace(char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000613{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000614 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000615 PyObject* item;
616 PyObject *list = PyList_New(0);
617
618 if (list == NULL)
619 return NULL;
620
Guido van Rossum4c08d552000-03-10 22:55:18 +0000621 for (i = j = 0; i < len; ) {
622 while (i < len && isspace(Py_CHARMASK(s[i])))
623 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000624 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 while (i < len && !isspace(Py_CHARMASK(s[i])))
626 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000627 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000628 if (maxsplit-- <= 0)
629 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 item = PyString_FromStringAndSize(s+j, (int)(i-j));
631 if (item == NULL)
632 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000633 err = PyList_Append(list, item);
634 Py_DECREF(item);
635 if (err < 0)
636 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000637 while (i < len && isspace(Py_CHARMASK(s[i])))
638 i++;
639 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000640 }
641 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000642 if (j < len) {
643 item = PyString_FromStringAndSize(s+j, (int)(len - j));
644 if (item == NULL)
645 goto finally;
646 err = PyList_Append(list, item);
647 Py_DECREF(item);
648 if (err < 0)
649 goto finally;
650 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000651 return list;
652 finally:
653 Py_DECREF(list);
654 return NULL;
655}
656
657
658static char split__doc__[] =
659"S.split([sep [,maxsplit]]) -> list of strings\n\
660\n\
661Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000662delimiter string. If maxsplit is given, at most maxsplit\n\
663splits are done. If sep is not specified, any whitespace string\n\
664is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665
666static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000667string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000668{
669 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000670 int maxsplit = -1;
671 const char *s = PyString_AS_STRING(self), *sub;
672 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000673
Guido van Rossum4c08d552000-03-10 22:55:18 +0000674 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000675 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000676 if (maxsplit < 0)
677 maxsplit = INT_MAX;
678 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000679 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000680 if (PyString_Check(subobj)) {
681 sub = PyString_AS_STRING(subobj);
682 n = PyString_GET_SIZE(subobj);
683 }
684 else if (PyUnicode_Check(subobj))
685 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
686 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
687 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688 if (n == 0) {
689 PyErr_SetString(PyExc_ValueError, "empty separator");
690 return NULL;
691 }
692
693 list = PyList_New(0);
694 if (list == NULL)
695 return NULL;
696
697 i = j = 0;
698 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000699 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (maxsplit-- <= 0)
701 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000702 item = PyString_FromStringAndSize(s+j, (int)(i-j));
703 if (item == NULL)
704 goto fail;
705 err = PyList_Append(list, item);
706 Py_DECREF(item);
707 if (err < 0)
708 goto fail;
709 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000710 }
711 else
712 i++;
713 }
714 item = PyString_FromStringAndSize(s+j, (int)(len-j));
715 if (item == NULL)
716 goto fail;
717 err = PyList_Append(list, item);
718 Py_DECREF(item);
719 if (err < 0)
720 goto fail;
721
722 return list;
723
724 fail:
725 Py_DECREF(list);
726 return NULL;
727}
728
729
730static char join__doc__[] =
731"S.join(sequence) -> string\n\
732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000733Return a string which is the concatenation of the strings in the\n\
734sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000735
736static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000737string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000738{
739 char *sep = PyString_AS_STRING(self);
740 int seplen = PyString_GET_SIZE(self);
741 PyObject *res = NULL;
742 int reslen = 0;
743 char *p;
744 int seqlen = 0;
745 int sz = 100;
746 int i, slen;
747 PyObject *seq;
748
Guido van Rossum43713e52000-02-29 13:59:29 +0000749 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000750 return NULL;
751
752 seqlen = PySequence_Length(seq);
753 if (seqlen < 0 && PyErr_Occurred())
754 return NULL;
755
756 if (seqlen == 1) {
757 /* Optimization if there's only one item */
758 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000759 if (item == NULL)
760 return NULL;
761 if (!PyString_Check(item) &&
762 !PyUnicode_Check(item)) {
763 PyErr_SetString(PyExc_TypeError,
764 "first argument must be sequence of strings");
765 Py_DECREF(item);
766 return NULL;
767 }
768 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000769 }
770 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
771 return NULL;
772 p = PyString_AsString(res);
773
774 /* optimize for lists. all others (tuples and arbitrary sequences)
775 * just use the abstract interface.
776 */
777 if (PyList_Check(seq)) {
778 for (i = 0; i < seqlen; i++) {
779 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000780 if (!PyString_Check(item)){
781 if (PyUnicode_Check(item)) {
782 Py_DECREF(res);
783 return PyUnicode_Join(
784 (PyObject *)self,
785 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000786 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000787 PyErr_Format(PyExc_TypeError,
788 "sequence item %i not a string",
789 i);
790 goto finally;
791 }
792 slen = PyString_GET_SIZE(item);
793 while (reslen + slen + seplen >= sz) {
794 if (_PyString_Resize(&res, sz*2))
795 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000796 sz *= 2;
797 p = PyString_AsString(res) + reslen;
798 }
799 if (i > 0) {
800 memcpy(p, sep, seplen);
801 p += seplen;
802 reslen += seplen;
803 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000804 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000805 p += slen;
806 reslen += slen;
807 }
808 }
809 else {
810 for (i = 0; i < seqlen; i++) {
811 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000812 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000813 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000814 if (!PyString_Check(item)){
815 if (PyUnicode_Check(item)) {
816 Py_DECREF(res);
817 Py_DECREF(item);
818 return PyUnicode_Join(
819 (PyObject *)self,
820 seq);
821 }
822 Py_DECREF(item);
823 PyErr_Format(PyExc_TypeError,
824 "sequence item %i not a string",
825 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000826 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000827 }
828 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000829 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000830 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000831 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000832 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000833 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000834 sz *= 2;
835 p = PyString_AsString(res) + reslen;
836 }
837 if (i > 0) {
838 memcpy(p, sep, seplen);
839 p += seplen;
840 reslen += seplen;
841 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000842 memcpy(p, PyString_AS_STRING(item), slen);
843 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000844 p += slen;
845 reslen += slen;
846 }
847 }
848 if (_PyString_Resize(&res, reslen))
849 goto finally;
850 return res;
851
852 finally:
853 Py_DECREF(res);
854 return NULL;
855}
856
857
858
859static long
Fred Drakeba096332000-07-09 07:04:36 +0000860string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000861{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000862 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000863 int len = PyString_GET_SIZE(self);
864 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000865 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000866
Guido van Rossumc6821402000-05-08 14:08:05 +0000867 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
868 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000869 return -2;
870 if (PyString_Check(subobj)) {
871 sub = PyString_AS_STRING(subobj);
872 n = PyString_GET_SIZE(subobj);
873 }
874 else if (PyUnicode_Check(subobj))
875 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
876 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000877 return -2;
878
879 if (last > len)
880 last = len;
881 if (last < 0)
882 last += len;
883 if (last < 0)
884 last = 0;
885 if (i < 0)
886 i += len;
887 if (i < 0)
888 i = 0;
889
Guido van Rossum4c08d552000-03-10 22:55:18 +0000890 if (dir > 0) {
891 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000892 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000893 last -= n;
894 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000895 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000896 return (long)i;
897 }
898 else {
899 int j;
900
901 if (n == 0 && i <= last)
902 return (long)last;
903 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000904 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000905 return (long)j;
906 }
907
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000908 return -1;
909}
910
911
912static char find__doc__[] =
913"S.find(sub [,start [,end]]) -> int\n\
914\n\
915Return the lowest index in S where substring sub is found,\n\
916such that sub is contained within s[start,end]. Optional\n\
917arguments start and end are interpreted as in slice notation.\n\
918\n\
919Return -1 on failure.";
920
921static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000922string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000923{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000924 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000925 if (result == -2)
926 return NULL;
927 return PyInt_FromLong(result);
928}
929
930
931static char index__doc__[] =
932"S.index(sub [,start [,end]]) -> int\n\
933\n\
934Like S.find() but raise ValueError when the substring is not found.";
935
936static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000937string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000938{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000939 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000940 if (result == -2)
941 return NULL;
942 if (result == -1) {
943 PyErr_SetString(PyExc_ValueError,
944 "substring not found in string.index");
945 return NULL;
946 }
947 return PyInt_FromLong(result);
948}
949
950
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000951static char rfind__doc__[] =
952"S.rfind(sub [,start [,end]]) -> int\n\
953\n\
954Return the highest index in S where substring sub is found,\n\
955such that sub is contained within s[start,end]. Optional\n\
956arguments start and end are interpreted as in slice notation.\n\
957\n\
958Return -1 on failure.";
959
960static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000961string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000962{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000963 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000964 if (result == -2)
965 return NULL;
966 return PyInt_FromLong(result);
967}
968
969
970static char rindex__doc__[] =
971"S.rindex(sub [,start [,end]]) -> int\n\
972\n\
973Like S.rfind() but raise ValueError when the substring is not found.";
974
975static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000976string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000977{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000978 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000979 if (result == -2)
980 return NULL;
981 if (result == -1) {
982 PyErr_SetString(PyExc_ValueError,
983 "substring not found in string.rindex");
984 return NULL;
985 }
986 return PyInt_FromLong(result);
987}
988
989
990static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000991do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000992{
993 char *s = PyString_AS_STRING(self);
994 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000995
Guido van Rossum43713e52000-02-29 13:59:29 +0000996 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000997 return NULL;
998
999 i = 0;
1000 if (striptype != RIGHTSTRIP) {
1001 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1002 i++;
1003 }
1004 }
1005
1006 j = len;
1007 if (striptype != LEFTSTRIP) {
1008 do {
1009 j--;
1010 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1011 j++;
1012 }
1013
1014 if (i == 0 && j == len) {
1015 Py_INCREF(self);
1016 return (PyObject*)self;
1017 }
1018 else
1019 return PyString_FromStringAndSize(s+i, j-i);
1020}
1021
1022
1023static char strip__doc__[] =
1024"S.strip() -> string\n\
1025\n\
1026Return a copy of the string S with leading and trailing\n\
1027whitespace removed.";
1028
1029static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001030string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001031{
1032 return do_strip(self, args, BOTHSTRIP);
1033}
1034
1035
1036static char lstrip__doc__[] =
1037"S.lstrip() -> string\n\
1038\n\
1039Return a copy of the string S with leading whitespace removed.";
1040
1041static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001042string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001043{
1044 return do_strip(self, args, LEFTSTRIP);
1045}
1046
1047
1048static char rstrip__doc__[] =
1049"S.rstrip() -> string\n\
1050\n\
1051Return a copy of the string S with trailing whitespace removed.";
1052
1053static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001054string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055{
1056 return do_strip(self, args, RIGHTSTRIP);
1057}
1058
1059
1060static char lower__doc__[] =
1061"S.lower() -> string\n\
1062\n\
1063Return a copy of the string S converted to lowercase.";
1064
1065static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001066string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001067{
1068 char *s = PyString_AS_STRING(self), *s_new;
1069 int i, n = PyString_GET_SIZE(self);
1070 PyObject *new;
1071
Guido van Rossum43713e52000-02-29 13:59:29 +00001072 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001073 return NULL;
1074 new = PyString_FromStringAndSize(NULL, n);
1075 if (new == NULL)
1076 return NULL;
1077 s_new = PyString_AsString(new);
1078 for (i = 0; i < n; i++) {
1079 int c = Py_CHARMASK(*s++);
1080 if (isupper(c)) {
1081 *s_new = tolower(c);
1082 } else
1083 *s_new = c;
1084 s_new++;
1085 }
1086 return new;
1087}
1088
1089
1090static char upper__doc__[] =
1091"S.upper() -> string\n\
1092\n\
1093Return a copy of the string S converted to uppercase.";
1094
1095static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001096string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001097{
1098 char *s = PyString_AS_STRING(self), *s_new;
1099 int i, n = PyString_GET_SIZE(self);
1100 PyObject *new;
1101
Guido van Rossum43713e52000-02-29 13:59:29 +00001102 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001103 return NULL;
1104 new = PyString_FromStringAndSize(NULL, n);
1105 if (new == NULL)
1106 return NULL;
1107 s_new = PyString_AsString(new);
1108 for (i = 0; i < n; i++) {
1109 int c = Py_CHARMASK(*s++);
1110 if (islower(c)) {
1111 *s_new = toupper(c);
1112 } else
1113 *s_new = c;
1114 s_new++;
1115 }
1116 return new;
1117}
1118
1119
Guido van Rossum4c08d552000-03-10 22:55:18 +00001120static char title__doc__[] =
1121"S.title() -> string\n\
1122\n\
1123Return a titlecased version of S, i.e. words start with uppercase\n\
1124characters, all remaining cased characters have lowercase.";
1125
1126static PyObject*
1127string_title(PyUnicodeObject *self, PyObject *args)
1128{
1129 char *s = PyString_AS_STRING(self), *s_new;
1130 int i, n = PyString_GET_SIZE(self);
1131 int previous_is_cased = 0;
1132 PyObject *new;
1133
1134 if (!PyArg_ParseTuple(args, ":title"))
1135 return NULL;
1136 new = PyString_FromStringAndSize(NULL, n);
1137 if (new == NULL)
1138 return NULL;
1139 s_new = PyString_AsString(new);
1140 for (i = 0; i < n; i++) {
1141 int c = Py_CHARMASK(*s++);
1142 if (islower(c)) {
1143 if (!previous_is_cased)
1144 c = toupper(c);
1145 previous_is_cased = 1;
1146 } else if (isupper(c)) {
1147 if (previous_is_cased)
1148 c = tolower(c);
1149 previous_is_cased = 1;
1150 } else
1151 previous_is_cased = 0;
1152 *s_new++ = c;
1153 }
1154 return new;
1155}
1156
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001157static char capitalize__doc__[] =
1158"S.capitalize() -> string\n\
1159\n\
1160Return a copy of the string S with only its first character\n\
1161capitalized.";
1162
1163static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001164string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165{
1166 char *s = PyString_AS_STRING(self), *s_new;
1167 int i, n = PyString_GET_SIZE(self);
1168 PyObject *new;
1169
Guido van Rossum43713e52000-02-29 13:59:29 +00001170 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001171 return NULL;
1172 new = PyString_FromStringAndSize(NULL, n);
1173 if (new == NULL)
1174 return NULL;
1175 s_new = PyString_AsString(new);
1176 if (0 < n) {
1177 int c = Py_CHARMASK(*s++);
1178 if (islower(c))
1179 *s_new = toupper(c);
1180 else
1181 *s_new = c;
1182 s_new++;
1183 }
1184 for (i = 1; i < n; i++) {
1185 int c = Py_CHARMASK(*s++);
1186 if (isupper(c))
1187 *s_new = tolower(c);
1188 else
1189 *s_new = c;
1190 s_new++;
1191 }
1192 return new;
1193}
1194
1195
1196static char count__doc__[] =
1197"S.count(sub[, start[, end]]) -> int\n\
1198\n\
1199Return the number of occurrences of substring sub in string\n\
1200S[start:end]. Optional arguments start and end are\n\
1201interpreted as in slice notation.";
1202
1203static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001204string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001205{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001206 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001207 int len = PyString_GET_SIZE(self), n;
1208 int i = 0, last = INT_MAX;
1209 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001210 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001211
Guido van Rossumc6821402000-05-08 14:08:05 +00001212 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1213 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001214 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001215
Guido van Rossum4c08d552000-03-10 22:55:18 +00001216 if (PyString_Check(subobj)) {
1217 sub = PyString_AS_STRING(subobj);
1218 n = PyString_GET_SIZE(subobj);
1219 }
1220 else if (PyUnicode_Check(subobj))
1221 return PyInt_FromLong(
1222 PyUnicode_Count((PyObject *)self, subobj, i, last));
1223 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1224 return NULL;
1225
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001226 if (last > len)
1227 last = len;
1228 if (last < 0)
1229 last += len;
1230 if (last < 0)
1231 last = 0;
1232 if (i < 0)
1233 i += len;
1234 if (i < 0)
1235 i = 0;
1236 m = last + 1 - n;
1237 if (n == 0)
1238 return PyInt_FromLong((long) (m-i));
1239
1240 r = 0;
1241 while (i < m) {
1242 if (!memcmp(s+i, sub, n)) {
1243 r++;
1244 i += n;
1245 } else {
1246 i++;
1247 }
1248 }
1249 return PyInt_FromLong((long) r);
1250}
1251
1252
1253static char swapcase__doc__[] =
1254"S.swapcase() -> string\n\
1255\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001256Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001257converted to lowercase and vice versa.";
1258
1259static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001260string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001261{
1262 char *s = PyString_AS_STRING(self), *s_new;
1263 int i, n = PyString_GET_SIZE(self);
1264 PyObject *new;
1265
Guido van Rossum43713e52000-02-29 13:59:29 +00001266 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001267 return NULL;
1268 new = PyString_FromStringAndSize(NULL, n);
1269 if (new == NULL)
1270 return NULL;
1271 s_new = PyString_AsString(new);
1272 for (i = 0; i < n; i++) {
1273 int c = Py_CHARMASK(*s++);
1274 if (islower(c)) {
1275 *s_new = toupper(c);
1276 }
1277 else if (isupper(c)) {
1278 *s_new = tolower(c);
1279 }
1280 else
1281 *s_new = c;
1282 s_new++;
1283 }
1284 return new;
1285}
1286
1287
1288static char translate__doc__[] =
1289"S.translate(table [,deletechars]) -> string\n\
1290\n\
1291Return a copy of the string S, where all characters occurring\n\
1292in the optional argument deletechars are removed, and the\n\
1293remaining characters have been mapped through the given\n\
1294translation table, which must be a string of length 256.";
1295
1296static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001297string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001298{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001299 register char *input, *output;
1300 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001301 register int i, c, changed = 0;
1302 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001303 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304 int inlen, tablen, dellen = 0;
1305 PyObject *result;
1306 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001308
Guido van Rossum4c08d552000-03-10 22:55:18 +00001309 if (!PyArg_ParseTuple(args, "O|O:translate",
1310 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001312
1313 if (PyString_Check(tableobj)) {
1314 table1 = PyString_AS_STRING(tableobj);
1315 tablen = PyString_GET_SIZE(tableobj);
1316 }
1317 else if (PyUnicode_Check(tableobj)) {
1318 /* Unicode .translate() does not support the deletechars
1319 parameter; instead a mapping to None will cause characters
1320 to be deleted. */
1321 if (delobj != NULL) {
1322 PyErr_SetString(PyExc_TypeError,
1323 "deletions are implemented differently for unicode");
1324 return NULL;
1325 }
1326 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1327 }
1328 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001329 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330
1331 if (delobj != NULL) {
1332 if (PyString_Check(delobj)) {
1333 del_table = PyString_AS_STRING(delobj);
1334 dellen = PyString_GET_SIZE(delobj);
1335 }
1336 else if (PyUnicode_Check(delobj)) {
1337 PyErr_SetString(PyExc_TypeError,
1338 "deletions are implemented differently for unicode");
1339 return NULL;
1340 }
1341 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1342 return NULL;
1343
1344 if (tablen != 256) {
1345 PyErr_SetString(PyExc_ValueError,
1346 "translation table must be 256 characters long");
1347 return NULL;
1348 }
1349 }
1350 else {
1351 del_table = NULL;
1352 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001353 }
1354
1355 table = table1;
1356 inlen = PyString_Size(input_obj);
1357 result = PyString_FromStringAndSize((char *)NULL, inlen);
1358 if (result == NULL)
1359 return NULL;
1360 output_start = output = PyString_AsString(result);
1361 input = PyString_AsString(input_obj);
1362
1363 if (dellen == 0) {
1364 /* If no deletions are required, use faster code */
1365 for (i = inlen; --i >= 0; ) {
1366 c = Py_CHARMASK(*input++);
1367 if (Py_CHARMASK((*output++ = table[c])) != c)
1368 changed = 1;
1369 }
1370 if (changed)
1371 return result;
1372 Py_DECREF(result);
1373 Py_INCREF(input_obj);
1374 return input_obj;
1375 }
1376
1377 for (i = 0; i < 256; i++)
1378 trans_table[i] = Py_CHARMASK(table[i]);
1379
1380 for (i = 0; i < dellen; i++)
1381 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1382
1383 for (i = inlen; --i >= 0; ) {
1384 c = Py_CHARMASK(*input++);
1385 if (trans_table[c] != -1)
1386 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1387 continue;
1388 changed = 1;
1389 }
1390 if (!changed) {
1391 Py_DECREF(result);
1392 Py_INCREF(input_obj);
1393 return input_obj;
1394 }
1395 /* Fix the size of the resulting string */
1396 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1397 return NULL;
1398 return result;
1399}
1400
1401
1402/* What follows is used for implementing replace(). Perry Stoll. */
1403
1404/*
1405 mymemfind
1406
1407 strstr replacement for arbitrary blocks of memory.
1408
Barry Warsaw51ac5802000-03-20 16:36:48 +00001409 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410 contents of memory pointed to by PAT. Returns the index into MEM if
1411 found, or -1 if not found. If len of PAT is greater than length of
1412 MEM, the function returns -1.
1413*/
1414static int
Fred Drakeba096332000-07-09 07:04:36 +00001415mymemfind(char *mem, int len, char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416{
1417 register int ii;
1418
1419 /* pattern can not occur in the last pat_len-1 chars */
1420 len -= pat_len;
1421
1422 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001423 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424 return ii;
1425 }
1426 }
1427 return -1;
1428}
1429
1430/*
1431 mymemcnt
1432
1433 Return the number of distinct times PAT is found in MEM.
1434 meaning mem=1111 and pat==11 returns 2.
1435 mem=11111 and pat==11 also return 2.
1436 */
1437static int
Fred Drakeba096332000-07-09 07:04:36 +00001438mymemcnt(char *mem, int len, char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439{
1440 register int offset = 0;
1441 int nfound = 0;
1442
1443 while (len >= 0) {
1444 offset = mymemfind(mem, len, pat, pat_len);
1445 if (offset == -1)
1446 break;
1447 mem += offset + pat_len;
1448 len -= offset + pat_len;
1449 nfound++;
1450 }
1451 return nfound;
1452}
1453
1454/*
1455 mymemreplace
1456
1457 Return a string in which all occurences of PAT in memory STR are
1458 replaced with SUB.
1459
1460 If length of PAT is less than length of STR or there are no occurences
1461 of PAT in STR, then the original string is returned. Otherwise, a new
1462 string is allocated here and returned.
1463
1464 on return, out_len is:
1465 the length of output string, or
1466 -1 if the input string is returned, or
1467 unchanged if an error occurs (no memory).
1468
1469 return value is:
1470 the new string allocated locally, or
1471 NULL if an error occurred.
1472*/
1473static char *
Fred Drakeba096332000-07-09 07:04:36 +00001474mymemreplace(char *str, int len, /* input string */
1475 char *pat, int pat_len, /* pattern string to find */
1476 char *sub, int sub_len, /* substitution string */
1477 int count, /* number of replacements */
1478 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479{
1480 char *out_s;
1481 char *new_s;
1482 int nfound, offset, new_len;
1483
1484 if (len == 0 || pat_len > len)
1485 goto return_same;
1486
1487 /* find length of output string */
1488 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001489 if (count < 0)
1490 count = INT_MAX;
1491 else if (nfound > count)
1492 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 if (nfound == 0)
1494 goto return_same;
1495 new_len = len + nfound*(sub_len - pat_len);
1496
Guido van Rossumb18618d2000-05-03 23:44:39 +00001497 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498 if (new_s == NULL) return NULL;
1499
1500 *out_len = new_len;
1501 out_s = new_s;
1502
1503 while (len > 0) {
1504 /* find index of next instance of pattern */
1505 offset = mymemfind(str, len, pat, pat_len);
1506 /* if not found, break out of loop */
1507 if (offset == -1) break;
1508
1509 /* copy non matching part of input string */
1510 memcpy(new_s, str, offset); /* copy part of str before pat */
1511 str += offset + pat_len; /* move str past pattern */
1512 len -= offset + pat_len; /* reduce length of str remaining */
1513
1514 /* copy substitute into the output string */
1515 new_s += offset; /* move new_s to dest for sub string */
1516 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1517 new_s += sub_len; /* offset new_s past sub string */
1518
1519 /* break when we've done count replacements */
1520 if (--count == 0) break;
1521 }
1522 /* copy any remaining values into output string */
1523 if (len > 0)
1524 memcpy(new_s, str, len);
1525 return out_s;
1526
1527 return_same:
1528 *out_len = -1;
1529 return str;
1530}
1531
1532
1533static char replace__doc__[] =
1534"S.replace (old, new[, maxsplit]) -> string\n\
1535\n\
1536Return a copy of string S with all occurrences of substring\n\
1537old replaced by new. If the optional argument maxsplit is\n\
1538given, only the first maxsplit occurrences are replaced.";
1539
1540static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001541string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001543 const char *str = PyString_AS_STRING(self), *sub, *repl;
1544 char *new_s;
1545 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1546 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001548 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549
Guido van Rossum4c08d552000-03-10 22:55:18 +00001550 if (!PyArg_ParseTuple(args, "OO|i:replace",
1551 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001553
1554 if (PyString_Check(subobj)) {
1555 sub = PyString_AS_STRING(subobj);
1556 sub_len = PyString_GET_SIZE(subobj);
1557 }
1558 else if (PyUnicode_Check(subobj))
1559 return PyUnicode_Replace((PyObject *)self,
1560 subobj, replobj, count);
1561 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1562 return NULL;
1563
1564 if (PyString_Check(replobj)) {
1565 repl = PyString_AS_STRING(replobj);
1566 repl_len = PyString_GET_SIZE(replobj);
1567 }
1568 else if (PyUnicode_Check(replobj))
1569 return PyUnicode_Replace((PyObject *)self,
1570 subobj, replobj, count);
1571 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1572 return NULL;
1573
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001574 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001575 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576 return NULL;
1577 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001578 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579 if (new_s == NULL) {
1580 PyErr_NoMemory();
1581 return NULL;
1582 }
1583 if (out_len == -1) {
1584 /* we're returning another reference to self */
1585 new = (PyObject*)self;
1586 Py_INCREF(new);
1587 }
1588 else {
1589 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001590 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591 }
1592 return new;
1593}
1594
1595
1596static char startswith__doc__[] =
1597"S.startswith(prefix[, start[, end]]) -> int\n\
1598\n\
1599Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1600optional start, test S beginning at that position. With optional end, stop\n\
1601comparing S at that position.";
1602
1603static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001604string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001605{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001608 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609 int plen;
1610 int start = 0;
1611 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001612 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613
Guido van Rossumc6821402000-05-08 14:08:05 +00001614 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1615 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 return NULL;
1617 if (PyString_Check(subobj)) {
1618 prefix = PyString_AS_STRING(subobj);
1619 plen = PyString_GET_SIZE(subobj);
1620 }
1621 else if (PyUnicode_Check(subobj))
1622 return PyInt_FromLong(
1623 PyUnicode_Tailmatch((PyObject *)self,
1624 subobj, start, end, -1));
1625 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 return NULL;
1627
1628 /* adopt Java semantics for index out of range. it is legal for
1629 * offset to be == plen, but this only returns true if prefix is
1630 * the empty string.
1631 */
1632 if (start < 0 || start+plen > len)
1633 return PyInt_FromLong(0);
1634
1635 if (!memcmp(str+start, prefix, plen)) {
1636 /* did the match end after the specified end? */
1637 if (end < 0)
1638 return PyInt_FromLong(1);
1639 else if (end - start < plen)
1640 return PyInt_FromLong(0);
1641 else
1642 return PyInt_FromLong(1);
1643 }
1644 else return PyInt_FromLong(0);
1645}
1646
1647
1648static char endswith__doc__[] =
1649"S.endswith(suffix[, start[, end]]) -> int\n\
1650\n\
1651Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1652optional start, test S beginning at that position. With optional end, stop\n\
1653comparing S at that position.";
1654
1655static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001656string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001657{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001658 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001659 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001660 const char* suffix;
1661 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662 int start = 0;
1663 int end = -1;
1664 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001666
Guido van Rossumc6821402000-05-08 14:08:05 +00001667 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1668 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001669 return NULL;
1670 if (PyString_Check(subobj)) {
1671 suffix = PyString_AS_STRING(subobj);
1672 slen = PyString_GET_SIZE(subobj);
1673 }
1674 else if (PyUnicode_Check(subobj))
1675 return PyInt_FromLong(
1676 PyUnicode_Tailmatch((PyObject *)self,
1677 subobj, start, end, +1));
1678 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001679 return NULL;
1680
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682 return PyInt_FromLong(0);
1683
1684 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001685 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001686
Guido van Rossum4c08d552000-03-10 22:55:18 +00001687 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688 return PyInt_FromLong(1);
1689 else return PyInt_FromLong(0);
1690}
1691
1692
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001693static char encode__doc__[] =
1694"S.encode([encoding[,errors]]) -> string\n\
1695\n\
1696Return an encoded string version of S. Default encoding is the current\n\
1697default string encoding. errors may be given to set a different error\n\
1698handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1699a ValueError. Other possible values are 'ignore' and 'replace'.";
1700
1701static PyObject *
1702string_encode(PyStringObject *self, PyObject *args)
1703{
1704 char *encoding = NULL;
1705 char *errors = NULL;
1706 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1707 return NULL;
1708 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1709}
1710
1711
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712static char expandtabs__doc__[] =
1713"S.expandtabs([tabsize]) -> string\n\
1714\n\
1715Return a copy of S where all tab characters are expanded using spaces.\n\
1716If tabsize is not given, a tab size of 8 characters is assumed.";
1717
1718static PyObject*
1719string_expandtabs(PyStringObject *self, PyObject *args)
1720{
1721 const char *e, *p;
1722 char *q;
1723 int i, j;
1724 PyObject *u;
1725 int tabsize = 8;
1726
1727 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1728 return NULL;
1729
1730 /* First pass: determine size of ouput string */
1731 i = j = 0;
1732 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1733 for (p = PyString_AS_STRING(self); p < e; p++)
1734 if (*p == '\t') {
1735 if (tabsize > 0)
1736 j += tabsize - (j % tabsize);
1737 }
1738 else {
1739 j++;
1740 if (*p == '\n' || *p == '\r') {
1741 i += j;
1742 j = 0;
1743 }
1744 }
1745
1746 /* Second pass: create output string and fill it */
1747 u = PyString_FromStringAndSize(NULL, i + j);
1748 if (!u)
1749 return NULL;
1750
1751 j = 0;
1752 q = PyString_AS_STRING(u);
1753
1754 for (p = PyString_AS_STRING(self); p < e; p++)
1755 if (*p == '\t') {
1756 if (tabsize > 0) {
1757 i = tabsize - (j % tabsize);
1758 j += i;
1759 while (i--)
1760 *q++ = ' ';
1761 }
1762 }
1763 else {
1764 j++;
1765 *q++ = *p;
1766 if (*p == '\n' || *p == '\r')
1767 j = 0;
1768 }
1769
1770 return u;
1771}
1772
1773static
1774PyObject *pad(PyStringObject *self,
1775 int left,
1776 int right,
1777 char fill)
1778{
1779 PyObject *u;
1780
1781 if (left < 0)
1782 left = 0;
1783 if (right < 0)
1784 right = 0;
1785
1786 if (left == 0 && right == 0) {
1787 Py_INCREF(self);
1788 return (PyObject *)self;
1789 }
1790
1791 u = PyString_FromStringAndSize(NULL,
1792 left + PyString_GET_SIZE(self) + right);
1793 if (u) {
1794 if (left)
1795 memset(PyString_AS_STRING(u), fill, left);
1796 memcpy(PyString_AS_STRING(u) + left,
1797 PyString_AS_STRING(self),
1798 PyString_GET_SIZE(self));
1799 if (right)
1800 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1801 fill, right);
1802 }
1803
1804 return u;
1805}
1806
1807static char ljust__doc__[] =
1808"S.ljust(width) -> string\n\
1809\n\
1810Return S left justified in a string of length width. Padding is\n\
1811done using spaces.";
1812
1813static PyObject *
1814string_ljust(PyStringObject *self, PyObject *args)
1815{
1816 int width;
1817 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1818 return NULL;
1819
1820 if (PyString_GET_SIZE(self) >= width) {
1821 Py_INCREF(self);
1822 return (PyObject*) self;
1823 }
1824
1825 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1826}
1827
1828
1829static char rjust__doc__[] =
1830"S.rjust(width) -> string\n\
1831\n\
1832Return S right justified in a string of length width. Padding is\n\
1833done using spaces.";
1834
1835static PyObject *
1836string_rjust(PyStringObject *self, PyObject *args)
1837{
1838 int width;
1839 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1840 return NULL;
1841
1842 if (PyString_GET_SIZE(self) >= width) {
1843 Py_INCREF(self);
1844 return (PyObject*) self;
1845 }
1846
1847 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1848}
1849
1850
1851static char center__doc__[] =
1852"S.center(width) -> string\n\
1853\n\
1854Return S centered in a string of length width. Padding is done\n\
1855using spaces.";
1856
1857static PyObject *
1858string_center(PyStringObject *self, PyObject *args)
1859{
1860 int marg, left;
1861 int width;
1862
1863 if (!PyArg_ParseTuple(args, "i:center", &width))
1864 return NULL;
1865
1866 if (PyString_GET_SIZE(self) >= width) {
1867 Py_INCREF(self);
1868 return (PyObject*) self;
1869 }
1870
1871 marg = width - PyString_GET_SIZE(self);
1872 left = marg / 2 + (marg & width & 1);
1873
1874 return pad(self, left, marg - left, ' ');
1875}
1876
1877#if 0
1878static char zfill__doc__[] =
1879"S.zfill(width) -> string\n\
1880\n\
1881Pad a numeric string x with zeros on the left, to fill a field\n\
1882of the specified width. The string x is never truncated.";
1883
1884static PyObject *
1885string_zfill(PyStringObject *self, PyObject *args)
1886{
1887 int fill;
1888 PyObject *u;
1889 char *str;
1890
1891 int width;
1892 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1893 return NULL;
1894
1895 if (PyString_GET_SIZE(self) >= width) {
1896 Py_INCREF(self);
1897 return (PyObject*) self;
1898 }
1899
1900 fill = width - PyString_GET_SIZE(self);
1901
1902 u = pad(self, fill, 0, '0');
1903 if (u == NULL)
1904 return NULL;
1905
1906 str = PyString_AS_STRING(u);
1907 if (str[fill] == '+' || str[fill] == '-') {
1908 /* move sign to beginning of string */
1909 str[0] = str[fill];
1910 str[fill] = '0';
1911 }
1912
1913 return u;
1914}
1915#endif
1916
1917static char isspace__doc__[] =
1918"S.isspace() -> int\n\
1919\n\
1920Return 1 if there are only whitespace characters in S,\n\
19210 otherwise.";
1922
1923static PyObject*
1924string_isspace(PyStringObject *self, PyObject *args)
1925{
Fred Drakeba096332000-07-09 07:04:36 +00001926 register const unsigned char *p
1927 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001928 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001929
1930 if (!PyArg_NoArgs(args))
1931 return NULL;
1932
1933 /* Shortcut for single character strings */
1934 if (PyString_GET_SIZE(self) == 1 &&
1935 isspace(*p))
1936 return PyInt_FromLong(1);
1937
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001938 /* Special case for empty strings */
1939 if (PyString_GET_SIZE(self) == 0)
1940 return PyInt_FromLong(0);
1941
Guido van Rossum4c08d552000-03-10 22:55:18 +00001942 e = p + PyString_GET_SIZE(self);
1943 for (; p < e; p++) {
1944 if (!isspace(*p))
1945 return PyInt_FromLong(0);
1946 }
1947 return PyInt_FromLong(1);
1948}
1949
1950
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001951static char isalpha__doc__[] =
1952"S.isalpha() -> int\n\
1953\n\
1954Return 1 if all characters in S are alphabetic\n\
1955and there is at least one character in S, 0 otherwise.";
1956
1957static PyObject*
1958string_isalpha(PyUnicodeObject *self, PyObject *args)
1959{
Fred Drakeba096332000-07-09 07:04:36 +00001960 register const unsigned char *p
1961 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001962 register const unsigned char *e;
1963
1964 if (!PyArg_NoArgs(args))
1965 return NULL;
1966
1967 /* Shortcut for single character strings */
1968 if (PyString_GET_SIZE(self) == 1 &&
1969 isalpha(*p))
1970 return PyInt_FromLong(1);
1971
1972 /* Special case for empty strings */
1973 if (PyString_GET_SIZE(self) == 0)
1974 return PyInt_FromLong(0);
1975
1976 e = p + PyString_GET_SIZE(self);
1977 for (; p < e; p++) {
1978 if (!isalpha(*p))
1979 return PyInt_FromLong(0);
1980 }
1981 return PyInt_FromLong(1);
1982}
1983
1984
1985static char isalnum__doc__[] =
1986"S.isalnum() -> int\n\
1987\n\
1988Return 1 if all characters in S are alphanumeric\n\
1989and there is at least one character in S, 0 otherwise.";
1990
1991static PyObject*
1992string_isalnum(PyUnicodeObject *self, PyObject *args)
1993{
Fred Drakeba096332000-07-09 07:04:36 +00001994 register const unsigned char *p
1995 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001996 register const unsigned char *e;
1997
1998 if (!PyArg_NoArgs(args))
1999 return NULL;
2000
2001 /* Shortcut for single character strings */
2002 if (PyString_GET_SIZE(self) == 1 &&
2003 isalnum(*p))
2004 return PyInt_FromLong(1);
2005
2006 /* Special case for empty strings */
2007 if (PyString_GET_SIZE(self) == 0)
2008 return PyInt_FromLong(0);
2009
2010 e = p + PyString_GET_SIZE(self);
2011 for (; p < e; p++) {
2012 if (!isalnum(*p))
2013 return PyInt_FromLong(0);
2014 }
2015 return PyInt_FromLong(1);
2016}
2017
2018
Guido van Rossum4c08d552000-03-10 22:55:18 +00002019static char isdigit__doc__[] =
2020"S.isdigit() -> int\n\
2021\n\
2022Return 1 if there are only digit characters in S,\n\
20230 otherwise.";
2024
2025static PyObject*
2026string_isdigit(PyStringObject *self, PyObject *args)
2027{
Fred Drakeba096332000-07-09 07:04:36 +00002028 register const unsigned char *p
2029 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002030 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002031
2032 if (!PyArg_NoArgs(args))
2033 return NULL;
2034
2035 /* Shortcut for single character strings */
2036 if (PyString_GET_SIZE(self) == 1 &&
2037 isdigit(*p))
2038 return PyInt_FromLong(1);
2039
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002040 /* Special case for empty strings */
2041 if (PyString_GET_SIZE(self) == 0)
2042 return PyInt_FromLong(0);
2043
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 e = p + PyString_GET_SIZE(self);
2045 for (; p < e; p++) {
2046 if (!isdigit(*p))
2047 return PyInt_FromLong(0);
2048 }
2049 return PyInt_FromLong(1);
2050}
2051
2052
2053static char islower__doc__[] =
2054"S.islower() -> int\n\
2055\n\
2056Return 1 if all cased characters in S are lowercase and there is\n\
2057at least one cased character in S, 0 otherwise.";
2058
2059static PyObject*
2060string_islower(PyStringObject *self, PyObject *args)
2061{
Fred Drakeba096332000-07-09 07:04:36 +00002062 register const unsigned char *p
2063 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002064 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065 int cased;
2066
2067 if (!PyArg_NoArgs(args))
2068 return NULL;
2069
2070 /* Shortcut for single character strings */
2071 if (PyString_GET_SIZE(self) == 1)
2072 return PyInt_FromLong(islower(*p) != 0);
2073
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002074 /* Special case for empty strings */
2075 if (PyString_GET_SIZE(self) == 0)
2076 return PyInt_FromLong(0);
2077
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 e = p + PyString_GET_SIZE(self);
2079 cased = 0;
2080 for (; p < e; p++) {
2081 if (isupper(*p))
2082 return PyInt_FromLong(0);
2083 else if (!cased && islower(*p))
2084 cased = 1;
2085 }
2086 return PyInt_FromLong(cased);
2087}
2088
2089
2090static char isupper__doc__[] =
2091"S.isupper() -> int\n\
2092\n\
2093Return 1 if all cased characters in S are uppercase and there is\n\
2094at least one cased character in S, 0 otherwise.";
2095
2096static PyObject*
2097string_isupper(PyStringObject *self, PyObject *args)
2098{
Fred Drakeba096332000-07-09 07:04:36 +00002099 register const unsigned char *p
2100 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002101 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002102 int cased;
2103
2104 if (!PyArg_NoArgs(args))
2105 return NULL;
2106
2107 /* Shortcut for single character strings */
2108 if (PyString_GET_SIZE(self) == 1)
2109 return PyInt_FromLong(isupper(*p) != 0);
2110
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002111 /* Special case for empty strings */
2112 if (PyString_GET_SIZE(self) == 0)
2113 return PyInt_FromLong(0);
2114
Guido van Rossum4c08d552000-03-10 22:55:18 +00002115 e = p + PyString_GET_SIZE(self);
2116 cased = 0;
2117 for (; p < e; p++) {
2118 if (islower(*p))
2119 return PyInt_FromLong(0);
2120 else if (!cased && isupper(*p))
2121 cased = 1;
2122 }
2123 return PyInt_FromLong(cased);
2124}
2125
2126
2127static char istitle__doc__[] =
2128"S.istitle() -> int\n\
2129\n\
2130Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2131may only follow uncased characters and lowercase characters only cased\n\
2132ones. Return 0 otherwise.";
2133
2134static PyObject*
2135string_istitle(PyStringObject *self, PyObject *args)
2136{
Fred Drakeba096332000-07-09 07:04:36 +00002137 register const unsigned char *p
2138 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002139 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002140 int cased, previous_is_cased;
2141
2142 if (!PyArg_NoArgs(args))
2143 return NULL;
2144
2145 /* Shortcut for single character strings */
2146 if (PyString_GET_SIZE(self) == 1)
2147 return PyInt_FromLong(isupper(*p) != 0);
2148
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002149 /* Special case for empty strings */
2150 if (PyString_GET_SIZE(self) == 0)
2151 return PyInt_FromLong(0);
2152
Guido van Rossum4c08d552000-03-10 22:55:18 +00002153 e = p + PyString_GET_SIZE(self);
2154 cased = 0;
2155 previous_is_cased = 0;
2156 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002157 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158
2159 if (isupper(ch)) {
2160 if (previous_is_cased)
2161 return PyInt_FromLong(0);
2162 previous_is_cased = 1;
2163 cased = 1;
2164 }
2165 else if (islower(ch)) {
2166 if (!previous_is_cased)
2167 return PyInt_FromLong(0);
2168 previous_is_cased = 1;
2169 cased = 1;
2170 }
2171 else
2172 previous_is_cased = 0;
2173 }
2174 return PyInt_FromLong(cased);
2175}
2176
2177
2178static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002179"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002180\n\
2181Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002182Line breaks are not included in the resulting list unless keepends\n\
2183is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184
2185#define SPLIT_APPEND(data, left, right) \
2186 str = PyString_FromStringAndSize(data + left, right - left); \
2187 if (!str) \
2188 goto onError; \
2189 if (PyList_Append(list, str)) { \
2190 Py_DECREF(str); \
2191 goto onError; \
2192 } \
2193 else \
2194 Py_DECREF(str);
2195
2196static PyObject*
2197string_splitlines(PyStringObject *self, PyObject *args)
2198{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199 register int i;
2200 register int j;
2201 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002202 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 PyObject *list;
2204 PyObject *str;
2205 char *data;
2206
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002207 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002208 return NULL;
2209
2210 data = PyString_AS_STRING(self);
2211 len = PyString_GET_SIZE(self);
2212
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213 list = PyList_New(0);
2214 if (!list)
2215 goto onError;
2216
2217 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002218 int eol;
2219
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220 /* Find a line and append it */
2221 while (i < len && data[i] != '\n' && data[i] != '\r')
2222 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002223
2224 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002225 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 if (i < len) {
2227 if (data[i] == '\r' && i + 1 < len &&
2228 data[i+1] == '\n')
2229 i += 2;
2230 else
2231 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002232 if (keepends)
2233 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002234 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002235 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 j = i;
2237 }
2238 if (j < len) {
2239 SPLIT_APPEND(data, j, len);
2240 }
2241
2242 return list;
2243
2244 onError:
2245 Py_DECREF(list);
2246 return NULL;
2247}
2248
2249#undef SPLIT_APPEND
2250
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251
2252static PyMethodDef
2253string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254 /* Counterparts of the obsolete stropmodule functions; except
2255 string.maketrans(). */
2256 {"join", (PyCFunction)string_join, 1, join__doc__},
2257 {"split", (PyCFunction)string_split, 1, split__doc__},
2258 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2259 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2260 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2261 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2262 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2263 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2264 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002265 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2266 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2268 {"count", (PyCFunction)string_count, 1, count__doc__},
2269 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2270 {"find", (PyCFunction)string_find, 1, find__doc__},
2271 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002273 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2274 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2275 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2276 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2278 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2279 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002280 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2281 {"title", (PyCFunction)string_title, 1, title__doc__},
2282 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2283 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2284 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002285 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2287 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2288#if 0
2289 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2290#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291 {NULL, NULL} /* sentinel */
2292};
2293
2294static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002295string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296{
2297 return Py_FindMethod(string_methods, (PyObject*)s, name);
2298}
2299
2300
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002301PyTypeObject PyString_Type = {
2302 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002303 0,
2304 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002305 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002306 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002307 (destructor)string_dealloc, /*tp_dealloc*/
2308 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002310 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002311 (cmpfunc)string_compare, /*tp_compare*/
2312 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002313 0, /*tp_as_number*/
2314 &string_as_sequence, /*tp_as_sequence*/
2315 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002316 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002317 0, /*tp_call*/
2318 0, /*tp_str*/
2319 0, /*tp_getattro*/
2320 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002321 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002322 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002323 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002324};
2325
2326void
Fred Drakeba096332000-07-09 07:04:36 +00002327PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002328{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002329 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002330 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002331 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002332 if (w == NULL || !PyString_Check(*pv)) {
2333 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002334 *pv = NULL;
2335 return;
2336 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002337 v = string_concat((PyStringObject *) *pv, w);
2338 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002339 *pv = v;
2340}
2341
Guido van Rossum013142a1994-08-30 08:19:36 +00002342void
Fred Drakeba096332000-07-09 07:04:36 +00002343PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002344{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002345 PyString_Concat(pv, w);
2346 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002347}
2348
2349
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002350/* The following function breaks the notion that strings are immutable:
2351 it changes the size of a string. We get away with this only if there
2352 is only one module referencing the object. You can also think of it
2353 as creating a new string object and destroying the old one, only
2354 more efficiently. In any case, don't use this if the string may
2355 already be known to some other part of the code... */
2356
2357int
Fred Drakeba096332000-07-09 07:04:36 +00002358_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002359{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002360 register PyObject *v;
2361 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002362 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002363 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002364 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002365 Py_DECREF(v);
2366 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002367 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002368 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002369 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002370#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002371 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002372#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002373 _Py_ForgetReference(v);
2374 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002375 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002376 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002377 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002378 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002379 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002380 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002381 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002382 _Py_NewReference(*pv);
2383 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002384 sv->ob_size = newsize;
2385 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002386 return 0;
2387}
Guido van Rossume5372401993-03-16 12:15:04 +00002388
2389/* Helpers for formatstring */
2390
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002391static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002392getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002393{
2394 int argidx = *p_argidx;
2395 if (argidx < arglen) {
2396 (*p_argidx)++;
2397 if (arglen < 0)
2398 return args;
2399 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002400 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002401 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002402 PyErr_SetString(PyExc_TypeError,
2403 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002404 return NULL;
2405}
2406
2407#define F_LJUST (1<<0)
2408#define F_SIGN (1<<1)
2409#define F_BLANK (1<<2)
2410#define F_ALT (1<<3)
2411#define F_ZERO (1<<4)
2412
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002413static int
Fred Drakeba096332000-07-09 07:04:36 +00002414formatfloat(char *buf, size_t buflen, int flags,
2415 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002416{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002417 /* fmt = '%#.' + `prec` + `type`
2418 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002419 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002420 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002421 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002422 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002423 if (prec < 0)
2424 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002425 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2426 type = 'g';
2427 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002428 /* worst case length calc to ensure no buffer overrun:
2429 fmt = %#.<prec>g
2430 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2431 for any double rep.)
2432 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2433 If prec=0 the effective precision is 1 (the leading digit is
2434 always given), therefore increase by one to 10+prec. */
2435 if (buflen <= (size_t)10 + (size_t)prec) {
2436 PyErr_SetString(PyExc_OverflowError,
2437 "formatted float is too long (precision too long?)");
2438 return -1;
2439 }
Guido van Rossume5372401993-03-16 12:15:04 +00002440 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002441 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002442}
2443
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002444static int
Fred Drakeba096332000-07-09 07:04:36 +00002445formatint(char *buf, size_t buflen, int flags,
2446 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002447{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002448 /* fmt = '%#.' + `prec` + 'l' + `type`
2449 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002450 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002451 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002452 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002453 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002454 if (prec < 0)
2455 prec = 1;
2456 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002457 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2458 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2459 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2460 PyErr_SetString(PyExc_OverflowError,
2461 "formatted integer is too long (precision too long?)");
2462 return -1;
2463 }
Guido van Rossume5372401993-03-16 12:15:04 +00002464 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002465 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002466}
2467
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002468static int
Fred Drakeba096332000-07-09 07:04:36 +00002469formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002470{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002471 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002472 if (PyString_Check(v)) {
2473 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002474 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002475 }
2476 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002477 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002478 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002479 }
2480 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002481 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002482}
2483
Guido van Rossum013142a1994-08-30 08:19:36 +00002484
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002485/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2486
2487 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2488 chars are formatted. XXX This is a magic number. Each formatting
2489 routine does bounds checking to ensure no overflow, but a better
2490 solution may be to malloc a buffer of appropriate size for each
2491 format. For now, the current solution is sufficient.
2492*/
2493#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002494
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002495PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002496PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002497{
2498 char *fmt, *res;
2499 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002500 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002501 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002502 PyObject *dict = NULL;
2503 if (format == NULL || !PyString_Check(format) || args == NULL) {
2504 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002505 return NULL;
2506 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002507 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002508 fmt = PyString_AsString(format);
2509 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002510 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002511 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002512 if (result == NULL)
2513 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002514 res = PyString_AsString(result);
2515 if (PyTuple_Check(args)) {
2516 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002517 argidx = 0;
2518 }
2519 else {
2520 arglen = -1;
2521 argidx = -2;
2522 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002523 if (args->ob_type->tp_as_mapping)
2524 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002525 while (--fmtcnt >= 0) {
2526 if (*fmt != '%') {
2527 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002528 rescnt = fmtcnt + 100;
2529 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002530 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002531 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002532 res = PyString_AsString(result)
2533 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002534 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002535 }
2536 *res++ = *fmt++;
2537 }
2538 else {
2539 /* Got a format specifier */
2540 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002541 int width = -1;
2542 int prec = -1;
2543 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002544 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002545 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002546 PyObject *v = NULL;
2547 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002548 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002549 int sign;
2550 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002551 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002552 char *fmt_start = fmt;
2553
Guido van Rossumda9c2711996-12-05 21:58:58 +00002554 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002555 if (*fmt == '(') {
2556 char *keystart;
2557 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002558 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002559 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002560
2561 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002562 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002563 "format requires a mapping");
2564 goto error;
2565 }
2566 ++fmt;
2567 --fmtcnt;
2568 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002569 /* Skip over balanced parentheses */
2570 while (pcount > 0 && --fmtcnt >= 0) {
2571 if (*fmt == ')')
2572 --pcount;
2573 else if (*fmt == '(')
2574 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002575 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002576 }
2577 keylen = fmt - keystart - 1;
2578 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002579 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002580 "incomplete format key");
2581 goto error;
2582 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002583 key = PyString_FromStringAndSize(keystart,
2584 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002585 if (key == NULL)
2586 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002587 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002588 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002589 args_owned = 0;
2590 }
2591 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002592 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002593 if (args == NULL) {
2594 goto error;
2595 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002596 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002597 arglen = -1;
2598 argidx = -2;
2599 }
Guido van Rossume5372401993-03-16 12:15:04 +00002600 while (--fmtcnt >= 0) {
2601 switch (c = *fmt++) {
2602 case '-': flags |= F_LJUST; continue;
2603 case '+': flags |= F_SIGN; continue;
2604 case ' ': flags |= F_BLANK; continue;
2605 case '#': flags |= F_ALT; continue;
2606 case '0': flags |= F_ZERO; continue;
2607 }
2608 break;
2609 }
2610 if (c == '*') {
2611 v = getnextarg(args, arglen, &argidx);
2612 if (v == NULL)
2613 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002614 if (!PyInt_Check(v)) {
2615 PyErr_SetString(PyExc_TypeError,
2616 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002617 goto error;
2618 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002619 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002620 if (width < 0) {
2621 flags |= F_LJUST;
2622 width = -width;
2623 }
Guido van Rossume5372401993-03-16 12:15:04 +00002624 if (--fmtcnt >= 0)
2625 c = *fmt++;
2626 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002627 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002628 width = c - '0';
2629 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002630 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002631 if (!isdigit(c))
2632 break;
2633 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002634 PyErr_SetString(
2635 PyExc_ValueError,
2636 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002637 goto error;
2638 }
2639 width = width*10 + (c - '0');
2640 }
2641 }
2642 if (c == '.') {
2643 prec = 0;
2644 if (--fmtcnt >= 0)
2645 c = *fmt++;
2646 if (c == '*') {
2647 v = getnextarg(args, arglen, &argidx);
2648 if (v == NULL)
2649 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002650 if (!PyInt_Check(v)) {
2651 PyErr_SetString(
2652 PyExc_TypeError,
2653 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002654 goto error;
2655 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002656 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002657 if (prec < 0)
2658 prec = 0;
2659 if (--fmtcnt >= 0)
2660 c = *fmt++;
2661 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002662 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002663 prec = c - '0';
2664 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002665 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002666 if (!isdigit(c))
2667 break;
2668 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002669 PyErr_SetString(
2670 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002671 "prec too big");
2672 goto error;
2673 }
2674 prec = prec*10 + (c - '0');
2675 }
2676 }
2677 } /* prec */
2678 if (fmtcnt >= 0) {
2679 if (c == 'h' || c == 'l' || c == 'L') {
2680 size = c;
2681 if (--fmtcnt >= 0)
2682 c = *fmt++;
2683 }
2684 }
2685 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002686 PyErr_SetString(PyExc_ValueError,
2687 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002688 goto error;
2689 }
2690 if (c != '%') {
2691 v = getnextarg(args, arglen, &argidx);
2692 if (v == NULL)
2693 goto error;
2694 }
2695 sign = 0;
2696 fill = ' ';
2697 switch (c) {
2698 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002699 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002700 len = 1;
2701 break;
2702 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002703 case 'r':
2704 if (PyUnicode_Check(v)) {
2705 fmt = fmt_start;
2706 goto unicode;
2707 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002708 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002709 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002710 else
2711 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002712 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002713 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002714 if (!PyString_Check(temp)) {
2715 PyErr_SetString(PyExc_TypeError,
2716 "%s argument has non-string str()");
2717 goto error;
2718 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002719 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002720 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002721 if (prec >= 0 && len > prec)
2722 len = prec;
2723 break;
2724 case 'i':
2725 case 'd':
2726 case 'u':
2727 case 'o':
2728 case 'x':
2729 case 'X':
2730 if (c == 'i')
2731 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002732 pbuf = formatbuf;
2733 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002734 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002735 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002736 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002737 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002738 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002739 if ((flags&F_ALT) &&
2740 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002741 pbuf[0] == '0' && pbuf[1] == c) {
2742 *res++ = *pbuf++;
2743 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002744 rescnt -= 2;
2745 len -= 2;
2746 width -= 2;
2747 if (width < 0)
2748 width = 0;
2749 }
2750 }
Guido van Rossume5372401993-03-16 12:15:04 +00002751 break;
2752 case 'e':
2753 case 'E':
2754 case 'f':
2755 case 'g':
2756 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002757 pbuf = formatbuf;
2758 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002759 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002760 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002761 sign = 1;
2762 if (flags&F_ZERO)
2763 fill = '0';
2764 break;
2765 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002766 pbuf = formatbuf;
2767 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002768 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002769 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002770 break;
2771 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002772 PyErr_Format(PyExc_ValueError,
2773 "unsupported format character '%c' (0x%x)",
2774 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002775 goto error;
2776 }
2777 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002778 if (*pbuf == '-' || *pbuf == '+') {
2779 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002780 len--;
2781 }
2782 else if (flags & F_SIGN)
2783 sign = '+';
2784 else if (flags & F_BLANK)
2785 sign = ' ';
2786 else
2787 sign = '\0';
2788 }
2789 if (width < len)
2790 width = len;
2791 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002792 reslen -= rescnt;
2793 rescnt = width + fmtcnt + 100;
2794 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002795 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002796 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002797 res = PyString_AsString(result)
2798 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002799 }
2800 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002801 if (fill != ' ')
2802 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002803 rescnt--;
2804 if (width > len)
2805 width--;
2806 }
2807 if (width > len && !(flags&F_LJUST)) {
2808 do {
2809 --rescnt;
2810 *res++ = fill;
2811 } while (--width > len);
2812 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002813 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002814 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002815 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002816 res += len;
2817 rescnt -= len;
2818 while (--width >= len) {
2819 --rescnt;
2820 *res++ = ' ';
2821 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002822 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002823 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002824 "not all arguments converted");
2825 goto error;
2826 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002827 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002828 } /* '%' */
2829 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002830 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002831 PyErr_SetString(PyExc_TypeError,
2832 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002833 goto error;
2834 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002835 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002836 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002837 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002838 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002839 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002840
2841 unicode:
2842 if (args_owned) {
2843 Py_DECREF(args);
2844 args_owned = 0;
2845 }
2846 /* Fiddle args right (remove the first argidx-1 arguments) */
2847 --argidx;
2848 if (PyTuple_Check(orig_args) && argidx > 0) {
2849 PyObject *v;
2850 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2851 v = PyTuple_New(n);
2852 if (v == NULL)
2853 goto error;
2854 while (--n >= 0) {
2855 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2856 Py_INCREF(w);
2857 PyTuple_SET_ITEM(v, n, w);
2858 }
2859 args = v;
2860 } else {
2861 Py_INCREF(orig_args);
2862 args = orig_args;
2863 }
2864 /* Paste rest of format string to what we have of the result
2865 string; we reuse result for this */
2866 rescnt = res - PyString_AS_STRING(result);
2867 fmtcnt = PyString_GET_SIZE(format) - \
2868 (fmt - PyString_AS_STRING(format));
2869 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2870 Py_DECREF(args);
2871 goto error;
2872 }
2873 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2874 format = result;
2875 /* Let Unicode do its magic */
2876 result = PyUnicode_Format(format, args);
2877 Py_DECREF(format);
2878 Py_DECREF(args);
2879 return result;
2880
Guido van Rossume5372401993-03-16 12:15:04 +00002881 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002882 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002883 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002884 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002885 }
Guido van Rossume5372401993-03-16 12:15:04 +00002886 return NULL;
2887}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002888
2889
2890#ifdef INTERN_STRINGS
2891
2892static PyObject *interned;
2893
2894void
Fred Drakeba096332000-07-09 07:04:36 +00002895PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002896{
2897 register PyStringObject *s = (PyStringObject *)(*p);
2898 PyObject *t;
2899 if (s == NULL || !PyString_Check(s))
2900 Py_FatalError("PyString_InternInPlace: strings only please!");
2901 if ((t = s->ob_sinterned) != NULL) {
2902 if (t == (PyObject *)s)
2903 return;
2904 Py_INCREF(t);
2905 *p = t;
2906 Py_DECREF(s);
2907 return;
2908 }
2909 if (interned == NULL) {
2910 interned = PyDict_New();
2911 if (interned == NULL)
2912 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002913 }
2914 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2915 Py_INCREF(t);
2916 *p = s->ob_sinterned = t;
2917 Py_DECREF(s);
2918 return;
2919 }
2920 t = (PyObject *)s;
2921 if (PyDict_SetItem(interned, t, t) == 0) {
2922 s->ob_sinterned = t;
2923 return;
2924 }
2925 PyErr_Clear();
2926}
2927
2928
2929PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002930PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00002931{
2932 PyObject *s = PyString_FromString(cp);
2933 if (s == NULL)
2934 return NULL;
2935 PyString_InternInPlace(&s);
2936 return s;
2937}
2938
2939#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002940
2941void
Fred Drakeba096332000-07-09 07:04:36 +00002942PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00002943{
2944 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002945 for (i = 0; i < UCHAR_MAX + 1; i++) {
2946 Py_XDECREF(characters[i]);
2947 characters[i] = NULL;
2948 }
2949#ifndef DONT_SHARE_SHORT_STRINGS
2950 Py_XDECREF(nullstring);
2951 nullstring = NULL;
2952#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002953#ifdef INTERN_STRINGS
2954 if (interned) {
2955 int pos, changed;
2956 PyObject *key, *value;
2957 do {
2958 changed = 0;
2959 pos = 0;
2960 while (PyDict_Next(interned, &pos, &key, &value)) {
2961 if (key->ob_refcnt == 2 && key == value) {
2962 PyDict_DelItem(interned, key);
2963 changed = 1;
2964 }
2965 }
2966 } while (changed);
2967 }
2968#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002969}