blob: 5fe5b6517a786aeb91f232fa8a664b6542ab8740 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum71160aa1997-06-03 18:03:18 +000036#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000037#include <ctype.h>
38
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000039#ifdef COUNT_ALLOCS
40int null_strings, one_strings;
41#endif
42
Guido van Rossum03093a21994-09-28 15:51:32 +000043#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000044#include <limits.h>
45#else
46#ifndef UCHAR_MAX
47#define UCHAR_MAX 255
48#endif
49#endif
50
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000052#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055
56/*
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
64 not NULL.
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
70*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071PyObject *
72PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000073 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074 int size;
75{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000077#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
92 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000095
96 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000098 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000099 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000101 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102#ifdef CACHE_HASH
103 op->ob_shash = -1;
104#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000105#ifdef INTERN_STRINGS
106 op->ob_sinterned = NULL;
107#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000108 if (str != NULL)
109 memcpy(op->ob_sval, str, size);
110 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0) {
113 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 } else if (size == 1 && str != NULL) {
116 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121}
122
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123PyObject *
124PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000125 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000127 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000129 if (size > INT_MAX) {
130 PyErr_SetString(PyExc_OverflowError,
131 "string is too long for a Python string");
132 return NULL;
133 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000134#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 if (size == 0 && (op = nullstring) != NULL) {
136#ifdef COUNT_ALLOCS
137 null_strings++;
138#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000139 Py_INCREF(op);
140 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 }
142 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
143#ifdef COUNT_ALLOCS
144 one_strings++;
145#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
147 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000149#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000150
151 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000153 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000154 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000156 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000157#ifdef CACHE_HASH
158 op->ob_shash = -1;
159#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000160#ifdef INTERN_STRINGS
161 op->ob_sinterned = NULL;
162#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000163 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000164#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000165 if (size == 0) {
166 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000167 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000168 } else if (size == 1) {
169 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000170 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000171 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000172#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000173 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000174}
175
Guido van Rossum234f9421993-06-17 12:35:49 +0000176static void
Guido van Rossume5372401993-03-16 12:15:04 +0000177string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000178 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000179{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000180 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000181}
182
Guido van Rossumd7047b31995-01-02 19:07:15 +0000183int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000184PyString_Size(op)
185 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000186{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000187 if (!PyString_Check(op)) {
188 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000189 return -1;
190 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000191 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192}
193
194/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000195PyString_AsString(op)
196 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000197{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000198 if (!PyString_Check(op)) {
199 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000200 return NULL;
201 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000202 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000203}
204
205/* Methods */
206
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000207static int
Guido van Rossume5372401993-03-16 12:15:04 +0000208string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000209 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 FILE *fp;
211 int flags;
212{
213 int i;
214 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000215 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000216 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000217 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000218 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000219 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000220 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000221
222 /* figure out which quote to use; single is prefered */
223 quote = '\'';
224 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
225 quote = '"';
226
227 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000228 for (i = 0; i < op->ob_size; i++) {
229 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 fprintf(fp, "\\%c", c);
232 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000233 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000235 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000236 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000237 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000238 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239}
240
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000241static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000242string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000243 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000244{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000245 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
246 PyObject *v;
247 if (newsize > INT_MAX) {
248 PyErr_SetString(PyExc_OverflowError,
249 "string is too large to make repr");
250 }
251 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000252 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000253 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000254 }
255 else {
256 register int i;
257 register char c;
258 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000259 int quote;
260
261 /* figure out which quote to use; single is prefered */
262 quote = '\'';
263 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
264 quote = '"';
265
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000266 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000267 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268 for (i = 0; i < op->ob_size; i++) {
269 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000270 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000271 *p++ = '\\', *p++ = c;
272 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000273 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000274 while (*p != '\0')
275 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 }
277 else
278 *p++ = c;
279 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000280 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000281 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000282 _PyString_Resize(
283 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000284 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000285 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000286}
287
288static int
Guido van Rossume5372401993-03-16 12:15:04 +0000289string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000290 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291{
292 return a->ob_size;
293}
294
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000295static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000296string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000297 register PyStringObject *a;
298 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000299{
300 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000301 register PyStringObject *op;
302 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000303 if (PyUnicode_Check(bb))
304 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000305 PyErr_Format(PyExc_TypeError,
306 "cannot add type \"%.200s\" to string",
307 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000308 return NULL;
309 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000310#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000311 /* Optimize cases with empty left or right operand */
312 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000313 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314 return bb;
315 }
316 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000317 Py_INCREF(a);
318 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000319 }
320 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000321 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000322 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000323 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000324 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000326 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000327#ifdef CACHE_HASH
328 op->ob_shash = -1;
329#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000330#ifdef INTERN_STRINGS
331 op->ob_sinterned = NULL;
332#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000333 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
334 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
335 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000336 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337#undef b
338}
339
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000340static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000341string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000342 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343 register int n;
344{
345 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000346 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 if (n < 0)
349 n = 0;
350 size = a->ob_size * n;
351 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000352 Py_INCREF(a);
353 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000354 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000355 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000356 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000357 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000358 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000359 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000360 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000361#ifdef CACHE_HASH
362 op->ob_shash = -1;
363#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000364#ifdef INTERN_STRINGS
365 op->ob_sinterned = NULL;
366#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000367 for (i = 0; i < size; i += a->ob_size)
368 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
369 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000370 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371}
372
373/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
374
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000375static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000376string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000378 register int i, j; /* May be negative! */
379{
380 if (i < 0)
381 i = 0;
382 if (j < 0)
383 j = 0; /* Avoid signed/unsigned bug in next line */
384 if (j > a->ob_size)
385 j = a->ob_size;
386 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000387 Py_INCREF(a);
388 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000389 }
390 if (j < i)
391 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000392 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000393}
394
Guido van Rossum9284a572000-03-07 15:53:43 +0000395static int
396string_contains(a, el)
397PyObject *a, *el;
398{
399 register char *s, *end;
400 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000401 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000402 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000403 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000404 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000405 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000406 return -1;
407 }
408 c = PyString_AsString(el)[0];
409 s = PyString_AsString(a);
410 end = s + PyString_Size(a);
411 while (s < end) {
412 if (c == *s++)
413 return 1;
414 }
415 return 0;
416}
417
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000418static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000419string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000420 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000421 register int i;
422{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000423 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000424 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000425 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000426 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427 return NULL;
428 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000429 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000431#ifdef COUNT_ALLOCS
432 if (v != NULL)
433 one_strings++;
434#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000435 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000436 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000437 if (v == NULL)
438 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 characters[c] = (PyStringObject *) v;
440 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000441 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000443 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444}
445
446static int
Guido van Rossume5372401993-03-16 12:15:04 +0000447string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000448 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449{
Guido van Rossum253919f1991-02-13 23:18:39 +0000450 int len_a = a->ob_size, len_b = b->ob_size;
451 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000452 int cmp;
453 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000454 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000455 if (cmp == 0)
456 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
457 if (cmp != 0)
458 return cmp;
459 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000460 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000461}
462
Guido van Rossum9bfef441993-03-29 10:43:31 +0000463static long
464string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000465 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000466{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000467 register int len;
468 register unsigned char *p;
469 register long x;
470
471#ifdef CACHE_HASH
472 if (a->ob_shash != -1)
473 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000474#ifdef INTERN_STRINGS
475 if (a->ob_sinterned != NULL)
476 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000477 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000478#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000479#endif
480 len = a->ob_size;
481 p = (unsigned char *) a->ob_sval;
482 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000483 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000484 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000485 x ^= a->ob_size;
486 if (x == -1)
487 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000488#ifdef CACHE_HASH
489 a->ob_shash = x;
490#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000491 return x;
492}
493
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000494static int
495string_buffer_getreadbuf(self, index, ptr)
496 PyStringObject *self;
497 int index;
498 const void **ptr;
499{
500 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000501 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000502 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000503 return -1;
504 }
505 *ptr = (void *)self->ob_sval;
506 return self->ob_size;
507}
508
509static int
510string_buffer_getwritebuf(self, index, ptr)
511 PyStringObject *self;
512 int index;
513 const void **ptr;
514{
Guido van Rossum045e6881997-09-08 18:30:11 +0000515 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000516 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000517 return -1;
518}
519
520static int
521string_buffer_getsegcount(self, lenp)
522 PyStringObject *self;
523 int *lenp;
524{
525 if ( lenp )
526 *lenp = self->ob_size;
527 return 1;
528}
529
Guido van Rossum1db70701998-10-08 02:18:52 +0000530static int
531string_buffer_getcharbuf(self, index, ptr)
532 PyStringObject *self;
533 int index;
534 const char **ptr;
535{
536 if ( index != 0 ) {
537 PyErr_SetString(PyExc_SystemError,
538 "accessing non-existent string segment");
539 return -1;
540 }
541 *ptr = self->ob_sval;
542 return self->ob_size;
543}
544
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000545static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000546 (inquiry)string_length, /*sq_length*/
547 (binaryfunc)string_concat, /*sq_concat*/
548 (intargfunc)string_repeat, /*sq_repeat*/
549 (intargfunc)string_item, /*sq_item*/
550 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000551 0, /*sq_ass_item*/
552 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000553 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000554};
555
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000556static PyBufferProcs string_as_buffer = {
557 (getreadbufferproc)string_buffer_getreadbuf,
558 (getwritebufferproc)string_buffer_getwritebuf,
559 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000560 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000561};
562
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000563
564
565#define LEFTSTRIP 0
566#define RIGHTSTRIP 1
567#define BOTHSTRIP 2
568
569
570static PyObject *
571split_whitespace(s, len, maxsplit)
572 char *s;
573 int len;
574 int maxsplit;
575{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000576 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000577 PyObject* item;
578 PyObject *list = PyList_New(0);
579
580 if (list == NULL)
581 return NULL;
582
Guido van Rossum4c08d552000-03-10 22:55:18 +0000583 for (i = j = 0; i < len; ) {
584 while (i < len && isspace(Py_CHARMASK(s[i])))
585 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000586 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000587 while (i < len && !isspace(Py_CHARMASK(s[i])))
588 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000589 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000590 if (maxsplit-- <= 0)
591 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000592 item = PyString_FromStringAndSize(s+j, (int)(i-j));
593 if (item == NULL)
594 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000595 err = PyList_Append(list, item);
596 Py_DECREF(item);
597 if (err < 0)
598 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000599 while (i < len && isspace(Py_CHARMASK(s[i])))
600 i++;
601 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000602 }
603 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000604 if (j < len) {
605 item = PyString_FromStringAndSize(s+j, (int)(len - j));
606 if (item == NULL)
607 goto finally;
608 err = PyList_Append(list, item);
609 Py_DECREF(item);
610 if (err < 0)
611 goto finally;
612 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000613 return list;
614 finally:
615 Py_DECREF(list);
616 return NULL;
617}
618
619
620static char split__doc__[] =
621"S.split([sep [,maxsplit]]) -> list of strings\n\
622\n\
623Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000624delimiter string. If maxsplit is given, at most maxsplit\n\
625splits are done. If sep is not specified, any whitespace string\n\
626is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000627
628static PyObject *
629string_split(self, args)
630 PyStringObject *self;
631 PyObject *args;
632{
633 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000634 int maxsplit = -1;
635 const char *s = PyString_AS_STRING(self), *sub;
636 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000637
Guido van Rossum4c08d552000-03-10 22:55:18 +0000638 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000639 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000640 if (maxsplit < 0)
641 maxsplit = INT_MAX;
642 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000643 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000644 if (PyString_Check(subobj)) {
645 sub = PyString_AS_STRING(subobj);
646 n = PyString_GET_SIZE(subobj);
647 }
648 else if (PyUnicode_Check(subobj))
649 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
650 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
651 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000652 if (n == 0) {
653 PyErr_SetString(PyExc_ValueError, "empty separator");
654 return NULL;
655 }
656
657 list = PyList_New(0);
658 if (list == NULL)
659 return NULL;
660
661 i = j = 0;
662 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000663 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000664 if (maxsplit-- <= 0)
665 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000666 item = PyString_FromStringAndSize(s+j, (int)(i-j));
667 if (item == NULL)
668 goto fail;
669 err = PyList_Append(list, item);
670 Py_DECREF(item);
671 if (err < 0)
672 goto fail;
673 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000674 }
675 else
676 i++;
677 }
678 item = PyString_FromStringAndSize(s+j, (int)(len-j));
679 if (item == NULL)
680 goto fail;
681 err = PyList_Append(list, item);
682 Py_DECREF(item);
683 if (err < 0)
684 goto fail;
685
686 return list;
687
688 fail:
689 Py_DECREF(list);
690 return NULL;
691}
692
693
694static char join__doc__[] =
695"S.join(sequence) -> string\n\
696\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000697Return a string which is the concatenation of the strings in the\n\
698sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000699
700static PyObject *
701string_join(self, args)
702 PyStringObject *self;
703 PyObject *args;
704{
705 char *sep = PyString_AS_STRING(self);
706 int seplen = PyString_GET_SIZE(self);
707 PyObject *res = NULL;
708 int reslen = 0;
709 char *p;
710 int seqlen = 0;
711 int sz = 100;
712 int i, slen;
713 PyObject *seq;
714
Guido van Rossum43713e52000-02-29 13:59:29 +0000715 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000716 return NULL;
717
718 seqlen = PySequence_Length(seq);
719 if (seqlen < 0 && PyErr_Occurred())
720 return NULL;
721
722 if (seqlen == 1) {
723 /* Optimization if there's only one item */
724 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000725 if (item == NULL)
726 return NULL;
727 if (!PyString_Check(item) &&
728 !PyUnicode_Check(item)) {
729 PyErr_SetString(PyExc_TypeError,
730 "first argument must be sequence of strings");
731 Py_DECREF(item);
732 return NULL;
733 }
734 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000735 }
736 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
737 return NULL;
738 p = PyString_AsString(res);
739
740 /* optimize for lists. all others (tuples and arbitrary sequences)
741 * just use the abstract interface.
742 */
743 if (PyList_Check(seq)) {
744 for (i = 0; i < seqlen; i++) {
745 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000746 if (!PyString_Check(item)){
747 if (PyUnicode_Check(item)) {
748 Py_DECREF(res);
749 return PyUnicode_Join(
750 (PyObject *)self,
751 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000752 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000753 PyErr_Format(PyExc_TypeError,
754 "sequence item %i not a string",
755 i);
756 goto finally;
757 }
758 slen = PyString_GET_SIZE(item);
759 while (reslen + slen + seplen >= sz) {
760 if (_PyString_Resize(&res, sz*2))
761 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 sz *= 2;
763 p = PyString_AsString(res) + reslen;
764 }
765 if (i > 0) {
766 memcpy(p, sep, seplen);
767 p += seplen;
768 reslen += seplen;
769 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000770 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000771 p += slen;
772 reslen += slen;
773 }
774 }
775 else {
776 for (i = 0; i < seqlen; i++) {
777 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000778 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000779 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000780 if (!PyString_Check(item)){
781 if (PyUnicode_Check(item)) {
782 Py_DECREF(res);
783 Py_DECREF(item);
784 return PyUnicode_Join(
785 (PyObject *)self,
786 seq);
787 }
788 Py_DECREF(item);
789 PyErr_Format(PyExc_TypeError,
790 "sequence item %i not a string",
791 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000792 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000793 }
794 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000795 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000796 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000797 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000798 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000799 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000800 sz *= 2;
801 p = PyString_AsString(res) + reslen;
802 }
803 if (i > 0) {
804 memcpy(p, sep, seplen);
805 p += seplen;
806 reslen += seplen;
807 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000808 memcpy(p, PyString_AS_STRING(item), slen);
809 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000810 p += slen;
811 reslen += slen;
812 }
813 }
814 if (_PyString_Resize(&res, reslen))
815 goto finally;
816 return res;
817
818 finally:
819 Py_DECREF(res);
820 return NULL;
821}
822
823
824
825static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000826string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000827 PyStringObject *self;
828 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000829 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000830{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000831 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000832 int len = PyString_GET_SIZE(self);
833 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000834 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000835
Guido van Rossumc6821402000-05-08 14:08:05 +0000836 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
837 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000838 return -2;
839 if (PyString_Check(subobj)) {
840 sub = PyString_AS_STRING(subobj);
841 n = PyString_GET_SIZE(subobj);
842 }
843 else if (PyUnicode_Check(subobj))
844 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
845 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000846 return -2;
847
848 if (last > len)
849 last = len;
850 if (last < 0)
851 last += len;
852 if (last < 0)
853 last = 0;
854 if (i < 0)
855 i += len;
856 if (i < 0)
857 i = 0;
858
Guido van Rossum4c08d552000-03-10 22:55:18 +0000859 if (dir > 0) {
860 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000861 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000862 last -= n;
863 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000864 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000865 return (long)i;
866 }
867 else {
868 int j;
869
870 if (n == 0 && i <= last)
871 return (long)last;
872 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000873 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000874 return (long)j;
875 }
876
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000877 return -1;
878}
879
880
881static char find__doc__[] =
882"S.find(sub [,start [,end]]) -> int\n\
883\n\
884Return the lowest index in S where substring sub is found,\n\
885such that sub is contained within s[start,end]. Optional\n\
886arguments start and end are interpreted as in slice notation.\n\
887\n\
888Return -1 on failure.";
889
890static PyObject *
891string_find(self, args)
892 PyStringObject *self;
893 PyObject *args;
894{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000895 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000896 if (result == -2)
897 return NULL;
898 return PyInt_FromLong(result);
899}
900
901
902static char index__doc__[] =
903"S.index(sub [,start [,end]]) -> int\n\
904\n\
905Like S.find() but raise ValueError when the substring is not found.";
906
907static PyObject *
908string_index(self, args)
909 PyStringObject *self;
910 PyObject *args;
911{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000912 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000913 if (result == -2)
914 return NULL;
915 if (result == -1) {
916 PyErr_SetString(PyExc_ValueError,
917 "substring not found in string.index");
918 return NULL;
919 }
920 return PyInt_FromLong(result);
921}
922
923
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000924static char rfind__doc__[] =
925"S.rfind(sub [,start [,end]]) -> int\n\
926\n\
927Return the highest index in S where substring sub is found,\n\
928such that sub is contained within s[start,end]. Optional\n\
929arguments start and end are interpreted as in slice notation.\n\
930\n\
931Return -1 on failure.";
932
933static PyObject *
934string_rfind(self, args)
935 PyStringObject *self;
936 PyObject *args;
937{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000938 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000939 if (result == -2)
940 return NULL;
941 return PyInt_FromLong(result);
942}
943
944
945static char rindex__doc__[] =
946"S.rindex(sub [,start [,end]]) -> int\n\
947\n\
948Like S.rfind() but raise ValueError when the substring is not found.";
949
950static PyObject *
951string_rindex(self, args)
952 PyStringObject *self;
953 PyObject *args;
954{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000955 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000956 if (result == -2)
957 return NULL;
958 if (result == -1) {
959 PyErr_SetString(PyExc_ValueError,
960 "substring not found in string.rindex");
961 return NULL;
962 }
963 return PyInt_FromLong(result);
964}
965
966
967static PyObject *
968do_strip(self, args, striptype)
969 PyStringObject *self;
970 PyObject *args;
971 int striptype;
972{
973 char *s = PyString_AS_STRING(self);
974 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000975
Guido van Rossum43713e52000-02-29 13:59:29 +0000976 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000977 return NULL;
978
979 i = 0;
980 if (striptype != RIGHTSTRIP) {
981 while (i < len && isspace(Py_CHARMASK(s[i]))) {
982 i++;
983 }
984 }
985
986 j = len;
987 if (striptype != LEFTSTRIP) {
988 do {
989 j--;
990 } while (j >= i && isspace(Py_CHARMASK(s[j])));
991 j++;
992 }
993
994 if (i == 0 && j == len) {
995 Py_INCREF(self);
996 return (PyObject*)self;
997 }
998 else
999 return PyString_FromStringAndSize(s+i, j-i);
1000}
1001
1002
1003static char strip__doc__[] =
1004"S.strip() -> string\n\
1005\n\
1006Return a copy of the string S with leading and trailing\n\
1007whitespace removed.";
1008
1009static PyObject *
1010string_strip(self, args)
1011 PyStringObject *self;
1012 PyObject *args;
1013{
1014 return do_strip(self, args, BOTHSTRIP);
1015}
1016
1017
1018static char lstrip__doc__[] =
1019"S.lstrip() -> string\n\
1020\n\
1021Return a copy of the string S with leading whitespace removed.";
1022
1023static PyObject *
1024string_lstrip(self, args)
1025 PyStringObject *self;
1026 PyObject *args;
1027{
1028 return do_strip(self, args, LEFTSTRIP);
1029}
1030
1031
1032static char rstrip__doc__[] =
1033"S.rstrip() -> string\n\
1034\n\
1035Return a copy of the string S with trailing whitespace removed.";
1036
1037static PyObject *
1038string_rstrip(self, args)
1039 PyStringObject *self;
1040 PyObject *args;
1041{
1042 return do_strip(self, args, RIGHTSTRIP);
1043}
1044
1045
1046static char lower__doc__[] =
1047"S.lower() -> string\n\
1048\n\
1049Return a copy of the string S converted to lowercase.";
1050
1051static PyObject *
1052string_lower(self, args)
1053 PyStringObject *self;
1054 PyObject *args;
1055{
1056 char *s = PyString_AS_STRING(self), *s_new;
1057 int i, n = PyString_GET_SIZE(self);
1058 PyObject *new;
1059
Guido van Rossum43713e52000-02-29 13:59:29 +00001060 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001061 return NULL;
1062 new = PyString_FromStringAndSize(NULL, n);
1063 if (new == NULL)
1064 return NULL;
1065 s_new = PyString_AsString(new);
1066 for (i = 0; i < n; i++) {
1067 int c = Py_CHARMASK(*s++);
1068 if (isupper(c)) {
1069 *s_new = tolower(c);
1070 } else
1071 *s_new = c;
1072 s_new++;
1073 }
1074 return new;
1075}
1076
1077
1078static char upper__doc__[] =
1079"S.upper() -> string\n\
1080\n\
1081Return a copy of the string S converted to uppercase.";
1082
1083static PyObject *
1084string_upper(self, args)
1085 PyStringObject *self;
1086 PyObject *args;
1087{
1088 char *s = PyString_AS_STRING(self), *s_new;
1089 int i, n = PyString_GET_SIZE(self);
1090 PyObject *new;
1091
Guido van Rossum43713e52000-02-29 13:59:29 +00001092 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001093 return NULL;
1094 new = PyString_FromStringAndSize(NULL, n);
1095 if (new == NULL)
1096 return NULL;
1097 s_new = PyString_AsString(new);
1098 for (i = 0; i < n; i++) {
1099 int c = Py_CHARMASK(*s++);
1100 if (islower(c)) {
1101 *s_new = toupper(c);
1102 } else
1103 *s_new = c;
1104 s_new++;
1105 }
1106 return new;
1107}
1108
1109
Guido van Rossum4c08d552000-03-10 22:55:18 +00001110static char title__doc__[] =
1111"S.title() -> string\n\
1112\n\
1113Return a titlecased version of S, i.e. words start with uppercase\n\
1114characters, all remaining cased characters have lowercase.";
1115
1116static PyObject*
1117string_title(PyUnicodeObject *self, PyObject *args)
1118{
1119 char *s = PyString_AS_STRING(self), *s_new;
1120 int i, n = PyString_GET_SIZE(self);
1121 int previous_is_cased = 0;
1122 PyObject *new;
1123
1124 if (!PyArg_ParseTuple(args, ":title"))
1125 return NULL;
1126 new = PyString_FromStringAndSize(NULL, n);
1127 if (new == NULL)
1128 return NULL;
1129 s_new = PyString_AsString(new);
1130 for (i = 0; i < n; i++) {
1131 int c = Py_CHARMASK(*s++);
1132 if (islower(c)) {
1133 if (!previous_is_cased)
1134 c = toupper(c);
1135 previous_is_cased = 1;
1136 } else if (isupper(c)) {
1137 if (previous_is_cased)
1138 c = tolower(c);
1139 previous_is_cased = 1;
1140 } else
1141 previous_is_cased = 0;
1142 *s_new++ = c;
1143 }
1144 return new;
1145}
1146
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001147static char capitalize__doc__[] =
1148"S.capitalize() -> string\n\
1149\n\
1150Return a copy of the string S with only its first character\n\
1151capitalized.";
1152
1153static PyObject *
1154string_capitalize(self, args)
1155 PyStringObject *self;
1156 PyObject *args;
1157{
1158 char *s = PyString_AS_STRING(self), *s_new;
1159 int i, n = PyString_GET_SIZE(self);
1160 PyObject *new;
1161
Guido van Rossum43713e52000-02-29 13:59:29 +00001162 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001163 return NULL;
1164 new = PyString_FromStringAndSize(NULL, n);
1165 if (new == NULL)
1166 return NULL;
1167 s_new = PyString_AsString(new);
1168 if (0 < n) {
1169 int c = Py_CHARMASK(*s++);
1170 if (islower(c))
1171 *s_new = toupper(c);
1172 else
1173 *s_new = c;
1174 s_new++;
1175 }
1176 for (i = 1; i < n; i++) {
1177 int c = Py_CHARMASK(*s++);
1178 if (isupper(c))
1179 *s_new = tolower(c);
1180 else
1181 *s_new = c;
1182 s_new++;
1183 }
1184 return new;
1185}
1186
1187
1188static char count__doc__[] =
1189"S.count(sub[, start[, end]]) -> int\n\
1190\n\
1191Return the number of occurrences of substring sub in string\n\
1192S[start:end]. Optional arguments start and end are\n\
1193interpreted as in slice notation.";
1194
1195static PyObject *
1196string_count(self, args)
1197 PyStringObject *self;
1198 PyObject *args;
1199{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001200 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001201 int len = PyString_GET_SIZE(self), n;
1202 int i = 0, last = INT_MAX;
1203 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001204 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001205
Guido van Rossumc6821402000-05-08 14:08:05 +00001206 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1207 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001208 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001209
Guido van Rossum4c08d552000-03-10 22:55:18 +00001210 if (PyString_Check(subobj)) {
1211 sub = PyString_AS_STRING(subobj);
1212 n = PyString_GET_SIZE(subobj);
1213 }
1214 else if (PyUnicode_Check(subobj))
1215 return PyInt_FromLong(
1216 PyUnicode_Count((PyObject *)self, subobj, i, last));
1217 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1218 return NULL;
1219
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001220 if (last > len)
1221 last = len;
1222 if (last < 0)
1223 last += len;
1224 if (last < 0)
1225 last = 0;
1226 if (i < 0)
1227 i += len;
1228 if (i < 0)
1229 i = 0;
1230 m = last + 1 - n;
1231 if (n == 0)
1232 return PyInt_FromLong((long) (m-i));
1233
1234 r = 0;
1235 while (i < m) {
1236 if (!memcmp(s+i, sub, n)) {
1237 r++;
1238 i += n;
1239 } else {
1240 i++;
1241 }
1242 }
1243 return PyInt_FromLong((long) r);
1244}
1245
1246
1247static char swapcase__doc__[] =
1248"S.swapcase() -> string\n\
1249\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001250Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001251converted to lowercase and vice versa.";
1252
1253static PyObject *
1254string_swapcase(self, args)
1255 PyStringObject *self;
1256 PyObject *args;
1257{
1258 char *s = PyString_AS_STRING(self), *s_new;
1259 int i, n = PyString_GET_SIZE(self);
1260 PyObject *new;
1261
Guido van Rossum43713e52000-02-29 13:59:29 +00001262 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001263 return NULL;
1264 new = PyString_FromStringAndSize(NULL, n);
1265 if (new == NULL)
1266 return NULL;
1267 s_new = PyString_AsString(new);
1268 for (i = 0; i < n; i++) {
1269 int c = Py_CHARMASK(*s++);
1270 if (islower(c)) {
1271 *s_new = toupper(c);
1272 }
1273 else if (isupper(c)) {
1274 *s_new = tolower(c);
1275 }
1276 else
1277 *s_new = c;
1278 s_new++;
1279 }
1280 return new;
1281}
1282
1283
1284static char translate__doc__[] =
1285"S.translate(table [,deletechars]) -> string\n\
1286\n\
1287Return a copy of the string S, where all characters occurring\n\
1288in the optional argument deletechars are removed, and the\n\
1289remaining characters have been mapped through the given\n\
1290translation table, which must be a string of length 256.";
1291
1292static PyObject *
1293string_translate(self, args)
1294 PyStringObject *self;
1295 PyObject *args;
1296{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001297 register char *input, *output;
1298 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 register int i, c, changed = 0;
1300 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302 int inlen, tablen, dellen = 0;
1303 PyObject *result;
1304 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001305 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307 if (!PyArg_ParseTuple(args, "O|O:translate",
1308 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001310
1311 if (PyString_Check(tableobj)) {
1312 table1 = PyString_AS_STRING(tableobj);
1313 tablen = PyString_GET_SIZE(tableobj);
1314 }
1315 else if (PyUnicode_Check(tableobj)) {
1316 /* Unicode .translate() does not support the deletechars
1317 parameter; instead a mapping to None will cause characters
1318 to be deleted. */
1319 if (delobj != NULL) {
1320 PyErr_SetString(PyExc_TypeError,
1321 "deletions are implemented differently for unicode");
1322 return NULL;
1323 }
1324 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1325 }
1326 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001328
1329 if (delobj != NULL) {
1330 if (PyString_Check(delobj)) {
1331 del_table = PyString_AS_STRING(delobj);
1332 dellen = PyString_GET_SIZE(delobj);
1333 }
1334 else if (PyUnicode_Check(delobj)) {
1335 PyErr_SetString(PyExc_TypeError,
1336 "deletions are implemented differently for unicode");
1337 return NULL;
1338 }
1339 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1340 return NULL;
1341
1342 if (tablen != 256) {
1343 PyErr_SetString(PyExc_ValueError,
1344 "translation table must be 256 characters long");
1345 return NULL;
1346 }
1347 }
1348 else {
1349 del_table = NULL;
1350 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001351 }
1352
1353 table = table1;
1354 inlen = PyString_Size(input_obj);
1355 result = PyString_FromStringAndSize((char *)NULL, inlen);
1356 if (result == NULL)
1357 return NULL;
1358 output_start = output = PyString_AsString(result);
1359 input = PyString_AsString(input_obj);
1360
1361 if (dellen == 0) {
1362 /* If no deletions are required, use faster code */
1363 for (i = inlen; --i >= 0; ) {
1364 c = Py_CHARMASK(*input++);
1365 if (Py_CHARMASK((*output++ = table[c])) != c)
1366 changed = 1;
1367 }
1368 if (changed)
1369 return result;
1370 Py_DECREF(result);
1371 Py_INCREF(input_obj);
1372 return input_obj;
1373 }
1374
1375 for (i = 0; i < 256; i++)
1376 trans_table[i] = Py_CHARMASK(table[i]);
1377
1378 for (i = 0; i < dellen; i++)
1379 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1380
1381 for (i = inlen; --i >= 0; ) {
1382 c = Py_CHARMASK(*input++);
1383 if (trans_table[c] != -1)
1384 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1385 continue;
1386 changed = 1;
1387 }
1388 if (!changed) {
1389 Py_DECREF(result);
1390 Py_INCREF(input_obj);
1391 return input_obj;
1392 }
1393 /* Fix the size of the resulting string */
1394 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1395 return NULL;
1396 return result;
1397}
1398
1399
1400/* What follows is used for implementing replace(). Perry Stoll. */
1401
1402/*
1403 mymemfind
1404
1405 strstr replacement for arbitrary blocks of memory.
1406
Barry Warsaw51ac5802000-03-20 16:36:48 +00001407 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408 contents of memory pointed to by PAT. Returns the index into MEM if
1409 found, or -1 if not found. If len of PAT is greater than length of
1410 MEM, the function returns -1.
1411*/
1412static int
1413mymemfind(mem, len, pat, pat_len)
1414 char *mem;
1415 int len;
1416 char *pat;
1417 int pat_len;
1418{
1419 register int ii;
1420
1421 /* pattern can not occur in the last pat_len-1 chars */
1422 len -= pat_len;
1423
1424 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001425 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426 return ii;
1427 }
1428 }
1429 return -1;
1430}
1431
1432/*
1433 mymemcnt
1434
1435 Return the number of distinct times PAT is found in MEM.
1436 meaning mem=1111 and pat==11 returns 2.
1437 mem=11111 and pat==11 also return 2.
1438 */
1439static int
1440mymemcnt(mem, len, pat, pat_len)
1441 char *mem;
1442 int len;
1443 char *pat;
1444 int pat_len;
1445{
1446 register int offset = 0;
1447 int nfound = 0;
1448
1449 while (len >= 0) {
1450 offset = mymemfind(mem, len, pat, pat_len);
1451 if (offset == -1)
1452 break;
1453 mem += offset + pat_len;
1454 len -= offset + pat_len;
1455 nfound++;
1456 }
1457 return nfound;
1458}
1459
1460/*
1461 mymemreplace
1462
1463 Return a string in which all occurences of PAT in memory STR are
1464 replaced with SUB.
1465
1466 If length of PAT is less than length of STR or there are no occurences
1467 of PAT in STR, then the original string is returned. Otherwise, a new
1468 string is allocated here and returned.
1469
1470 on return, out_len is:
1471 the length of output string, or
1472 -1 if the input string is returned, or
1473 unchanged if an error occurs (no memory).
1474
1475 return value is:
1476 the new string allocated locally, or
1477 NULL if an error occurred.
1478*/
1479static char *
1480mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1481 char *str;
1482 int len; /* input string */
1483 char *pat;
1484 int pat_len; /* pattern string to find */
1485 char *sub;
1486 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001487 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 int *out_len;
1489
1490{
1491 char *out_s;
1492 char *new_s;
1493 int nfound, offset, new_len;
1494
1495 if (len == 0 || pat_len > len)
1496 goto return_same;
1497
1498 /* find length of output string */
1499 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001500 if (count < 0)
1501 count = INT_MAX;
1502 else if (nfound > count)
1503 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 if (nfound == 0)
1505 goto return_same;
1506 new_len = len + nfound*(sub_len - pat_len);
1507
Guido van Rossumb18618d2000-05-03 23:44:39 +00001508 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509 if (new_s == NULL) return NULL;
1510
1511 *out_len = new_len;
1512 out_s = new_s;
1513
1514 while (len > 0) {
1515 /* find index of next instance of pattern */
1516 offset = mymemfind(str, len, pat, pat_len);
1517 /* if not found, break out of loop */
1518 if (offset == -1) break;
1519
1520 /* copy non matching part of input string */
1521 memcpy(new_s, str, offset); /* copy part of str before pat */
1522 str += offset + pat_len; /* move str past pattern */
1523 len -= offset + pat_len; /* reduce length of str remaining */
1524
1525 /* copy substitute into the output string */
1526 new_s += offset; /* move new_s to dest for sub string */
1527 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1528 new_s += sub_len; /* offset new_s past sub string */
1529
1530 /* break when we've done count replacements */
1531 if (--count == 0) break;
1532 }
1533 /* copy any remaining values into output string */
1534 if (len > 0)
1535 memcpy(new_s, str, len);
1536 return out_s;
1537
1538 return_same:
1539 *out_len = -1;
1540 return str;
1541}
1542
1543
1544static char replace__doc__[] =
1545"S.replace (old, new[, maxsplit]) -> string\n\
1546\n\
1547Return a copy of string S with all occurrences of substring\n\
1548old replaced by new. If the optional argument maxsplit is\n\
1549given, only the first maxsplit occurrences are replaced.";
1550
1551static PyObject *
1552string_replace(self, args)
1553 PyStringObject *self;
1554 PyObject *args;
1555{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001556 const char *str = PyString_AS_STRING(self), *sub, *repl;
1557 char *new_s;
1558 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1559 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001560 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001561 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001562
Guido van Rossum4c08d552000-03-10 22:55:18 +00001563 if (!PyArg_ParseTuple(args, "OO|i:replace",
1564 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001566
1567 if (PyString_Check(subobj)) {
1568 sub = PyString_AS_STRING(subobj);
1569 sub_len = PyString_GET_SIZE(subobj);
1570 }
1571 else if (PyUnicode_Check(subobj))
1572 return PyUnicode_Replace((PyObject *)self,
1573 subobj, replobj, count);
1574 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1575 return NULL;
1576
1577 if (PyString_Check(replobj)) {
1578 repl = PyString_AS_STRING(replobj);
1579 repl_len = PyString_GET_SIZE(replobj);
1580 }
1581 else if (PyUnicode_Check(replobj))
1582 return PyUnicode_Replace((PyObject *)self,
1583 subobj, replobj, count);
1584 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1585 return NULL;
1586
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001587 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001588 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589 return NULL;
1590 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001591 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592 if (new_s == NULL) {
1593 PyErr_NoMemory();
1594 return NULL;
1595 }
1596 if (out_len == -1) {
1597 /* we're returning another reference to self */
1598 new = (PyObject*)self;
1599 Py_INCREF(new);
1600 }
1601 else {
1602 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001603 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604 }
1605 return new;
1606}
1607
1608
1609static char startswith__doc__[] =
1610"S.startswith(prefix[, start[, end]]) -> int\n\
1611\n\
1612Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1613optional start, test S beginning at that position. With optional end, stop\n\
1614comparing S at that position.";
1615
1616static PyObject *
1617string_startswith(self, args)
1618 PyStringObject *self;
1619 PyObject *args;
1620{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001623 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 int plen;
1625 int start = 0;
1626 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001627 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628
Guido van Rossumc6821402000-05-08 14:08:05 +00001629 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1630 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631 return NULL;
1632 if (PyString_Check(subobj)) {
1633 prefix = PyString_AS_STRING(subobj);
1634 plen = PyString_GET_SIZE(subobj);
1635 }
1636 else if (PyUnicode_Check(subobj))
1637 return PyInt_FromLong(
1638 PyUnicode_Tailmatch((PyObject *)self,
1639 subobj, start, end, -1));
1640 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 return NULL;
1642
1643 /* adopt Java semantics for index out of range. it is legal for
1644 * offset to be == plen, but this only returns true if prefix is
1645 * the empty string.
1646 */
1647 if (start < 0 || start+plen > len)
1648 return PyInt_FromLong(0);
1649
1650 if (!memcmp(str+start, prefix, plen)) {
1651 /* did the match end after the specified end? */
1652 if (end < 0)
1653 return PyInt_FromLong(1);
1654 else if (end - start < plen)
1655 return PyInt_FromLong(0);
1656 else
1657 return PyInt_FromLong(1);
1658 }
1659 else return PyInt_FromLong(0);
1660}
1661
1662
1663static char endswith__doc__[] =
1664"S.endswith(suffix[, start[, end]]) -> int\n\
1665\n\
1666Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1667optional start, test S beginning at that position. With optional end, stop\n\
1668comparing S at that position.";
1669
1670static PyObject *
1671string_endswith(self, args)
1672 PyStringObject *self;
1673 PyObject *args;
1674{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001675 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 const char* suffix;
1678 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001679 int start = 0;
1680 int end = -1;
1681 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001682 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001683
Guido van Rossumc6821402000-05-08 14:08:05 +00001684 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1685 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001686 return NULL;
1687 if (PyString_Check(subobj)) {
1688 suffix = PyString_AS_STRING(subobj);
1689 slen = PyString_GET_SIZE(subobj);
1690 }
1691 else if (PyUnicode_Check(subobj))
1692 return PyInt_FromLong(
1693 PyUnicode_Tailmatch((PyObject *)self,
1694 subobj, start, end, +1));
1695 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696 return NULL;
1697
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699 return PyInt_FromLong(0);
1700
1701 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001702 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703
Guido van Rossum4c08d552000-03-10 22:55:18 +00001704 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 return PyInt_FromLong(1);
1706 else return PyInt_FromLong(0);
1707}
1708
1709
Guido van Rossum4c08d552000-03-10 22:55:18 +00001710static char expandtabs__doc__[] =
1711"S.expandtabs([tabsize]) -> string\n\
1712\n\
1713Return a copy of S where all tab characters are expanded using spaces.\n\
1714If tabsize is not given, a tab size of 8 characters is assumed.";
1715
1716static PyObject*
1717string_expandtabs(PyStringObject *self, PyObject *args)
1718{
1719 const char *e, *p;
1720 char *q;
1721 int i, j;
1722 PyObject *u;
1723 int tabsize = 8;
1724
1725 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1726 return NULL;
1727
1728 /* First pass: determine size of ouput string */
1729 i = j = 0;
1730 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1731 for (p = PyString_AS_STRING(self); p < e; p++)
1732 if (*p == '\t') {
1733 if (tabsize > 0)
1734 j += tabsize - (j % tabsize);
1735 }
1736 else {
1737 j++;
1738 if (*p == '\n' || *p == '\r') {
1739 i += j;
1740 j = 0;
1741 }
1742 }
1743
1744 /* Second pass: create output string and fill it */
1745 u = PyString_FromStringAndSize(NULL, i + j);
1746 if (!u)
1747 return NULL;
1748
1749 j = 0;
1750 q = PyString_AS_STRING(u);
1751
1752 for (p = PyString_AS_STRING(self); p < e; p++)
1753 if (*p == '\t') {
1754 if (tabsize > 0) {
1755 i = tabsize - (j % tabsize);
1756 j += i;
1757 while (i--)
1758 *q++ = ' ';
1759 }
1760 }
1761 else {
1762 j++;
1763 *q++ = *p;
1764 if (*p == '\n' || *p == '\r')
1765 j = 0;
1766 }
1767
1768 return u;
1769}
1770
1771static
1772PyObject *pad(PyStringObject *self,
1773 int left,
1774 int right,
1775 char fill)
1776{
1777 PyObject *u;
1778
1779 if (left < 0)
1780 left = 0;
1781 if (right < 0)
1782 right = 0;
1783
1784 if (left == 0 && right == 0) {
1785 Py_INCREF(self);
1786 return (PyObject *)self;
1787 }
1788
1789 u = PyString_FromStringAndSize(NULL,
1790 left + PyString_GET_SIZE(self) + right);
1791 if (u) {
1792 if (left)
1793 memset(PyString_AS_STRING(u), fill, left);
1794 memcpy(PyString_AS_STRING(u) + left,
1795 PyString_AS_STRING(self),
1796 PyString_GET_SIZE(self));
1797 if (right)
1798 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1799 fill, right);
1800 }
1801
1802 return u;
1803}
1804
1805static char ljust__doc__[] =
1806"S.ljust(width) -> string\n\
1807\n\
1808Return S left justified in a string of length width. Padding is\n\
1809done using spaces.";
1810
1811static PyObject *
1812string_ljust(PyStringObject *self, PyObject *args)
1813{
1814 int width;
1815 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1816 return NULL;
1817
1818 if (PyString_GET_SIZE(self) >= width) {
1819 Py_INCREF(self);
1820 return (PyObject*) self;
1821 }
1822
1823 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1824}
1825
1826
1827static char rjust__doc__[] =
1828"S.rjust(width) -> string\n\
1829\n\
1830Return S right justified in a string of length width. Padding is\n\
1831done using spaces.";
1832
1833static PyObject *
1834string_rjust(PyStringObject *self, PyObject *args)
1835{
1836 int width;
1837 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1838 return NULL;
1839
1840 if (PyString_GET_SIZE(self) >= width) {
1841 Py_INCREF(self);
1842 return (PyObject*) self;
1843 }
1844
1845 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1846}
1847
1848
1849static char center__doc__[] =
1850"S.center(width) -> string\n\
1851\n\
1852Return S centered in a string of length width. Padding is done\n\
1853using spaces.";
1854
1855static PyObject *
1856string_center(PyStringObject *self, PyObject *args)
1857{
1858 int marg, left;
1859 int width;
1860
1861 if (!PyArg_ParseTuple(args, "i:center", &width))
1862 return NULL;
1863
1864 if (PyString_GET_SIZE(self) >= width) {
1865 Py_INCREF(self);
1866 return (PyObject*) self;
1867 }
1868
1869 marg = width - PyString_GET_SIZE(self);
1870 left = marg / 2 + (marg & width & 1);
1871
1872 return pad(self, left, marg - left, ' ');
1873}
1874
1875#if 0
1876static char zfill__doc__[] =
1877"S.zfill(width) -> string\n\
1878\n\
1879Pad a numeric string x with zeros on the left, to fill a field\n\
1880of the specified width. The string x is never truncated.";
1881
1882static PyObject *
1883string_zfill(PyStringObject *self, PyObject *args)
1884{
1885 int fill;
1886 PyObject *u;
1887 char *str;
1888
1889 int width;
1890 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1891 return NULL;
1892
1893 if (PyString_GET_SIZE(self) >= width) {
1894 Py_INCREF(self);
1895 return (PyObject*) self;
1896 }
1897
1898 fill = width - PyString_GET_SIZE(self);
1899
1900 u = pad(self, fill, 0, '0');
1901 if (u == NULL)
1902 return NULL;
1903
1904 str = PyString_AS_STRING(u);
1905 if (str[fill] == '+' || str[fill] == '-') {
1906 /* move sign to beginning of string */
1907 str[0] = str[fill];
1908 str[fill] = '0';
1909 }
1910
1911 return u;
1912}
1913#endif
1914
1915static char isspace__doc__[] =
1916"S.isspace() -> int\n\
1917\n\
1918Return 1 if there are only whitespace characters in S,\n\
19190 otherwise.";
1920
1921static PyObject*
1922string_isspace(PyStringObject *self, PyObject *args)
1923{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001924 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1925 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001926
1927 if (!PyArg_NoArgs(args))
1928 return NULL;
1929
1930 /* Shortcut for single character strings */
1931 if (PyString_GET_SIZE(self) == 1 &&
1932 isspace(*p))
1933 return PyInt_FromLong(1);
1934
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001935 /* Special case for empty strings */
1936 if (PyString_GET_SIZE(self) == 0)
1937 return PyInt_FromLong(0);
1938
Guido van Rossum4c08d552000-03-10 22:55:18 +00001939 e = p + PyString_GET_SIZE(self);
1940 for (; p < e; p++) {
1941 if (!isspace(*p))
1942 return PyInt_FromLong(0);
1943 }
1944 return PyInt_FromLong(1);
1945}
1946
1947
1948static char isdigit__doc__[] =
1949"S.isdigit() -> int\n\
1950\n\
1951Return 1 if there are only digit characters in S,\n\
19520 otherwise.";
1953
1954static PyObject*
1955string_isdigit(PyStringObject *self, PyObject *args)
1956{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001957 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1958 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001959
1960 if (!PyArg_NoArgs(args))
1961 return NULL;
1962
1963 /* Shortcut for single character strings */
1964 if (PyString_GET_SIZE(self) == 1 &&
1965 isdigit(*p))
1966 return PyInt_FromLong(1);
1967
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001968 /* Special case for empty strings */
1969 if (PyString_GET_SIZE(self) == 0)
1970 return PyInt_FromLong(0);
1971
Guido van Rossum4c08d552000-03-10 22:55:18 +00001972 e = p + PyString_GET_SIZE(self);
1973 for (; p < e; p++) {
1974 if (!isdigit(*p))
1975 return PyInt_FromLong(0);
1976 }
1977 return PyInt_FromLong(1);
1978}
1979
1980
1981static char islower__doc__[] =
1982"S.islower() -> int\n\
1983\n\
1984Return 1 if all cased characters in S are lowercase and there is\n\
1985at least one cased character in S, 0 otherwise.";
1986
1987static PyObject*
1988string_islower(PyStringObject *self, PyObject *args)
1989{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001990 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1991 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001992 int cased;
1993
1994 if (!PyArg_NoArgs(args))
1995 return NULL;
1996
1997 /* Shortcut for single character strings */
1998 if (PyString_GET_SIZE(self) == 1)
1999 return PyInt_FromLong(islower(*p) != 0);
2000
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002001 /* Special case for empty strings */
2002 if (PyString_GET_SIZE(self) == 0)
2003 return PyInt_FromLong(0);
2004
Guido van Rossum4c08d552000-03-10 22:55:18 +00002005 e = p + PyString_GET_SIZE(self);
2006 cased = 0;
2007 for (; p < e; p++) {
2008 if (isupper(*p))
2009 return PyInt_FromLong(0);
2010 else if (!cased && islower(*p))
2011 cased = 1;
2012 }
2013 return PyInt_FromLong(cased);
2014}
2015
2016
2017static char isupper__doc__[] =
2018"S.isupper() -> int\n\
2019\n\
2020Return 1 if all cased characters in S are uppercase and there is\n\
2021at least one cased character in S, 0 otherwise.";
2022
2023static PyObject*
2024string_isupper(PyStringObject *self, PyObject *args)
2025{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002026 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2027 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002028 int cased;
2029
2030 if (!PyArg_NoArgs(args))
2031 return NULL;
2032
2033 /* Shortcut for single character strings */
2034 if (PyString_GET_SIZE(self) == 1)
2035 return PyInt_FromLong(isupper(*p) != 0);
2036
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002037 /* Special case for empty strings */
2038 if (PyString_GET_SIZE(self) == 0)
2039 return PyInt_FromLong(0);
2040
Guido van Rossum4c08d552000-03-10 22:55:18 +00002041 e = p + PyString_GET_SIZE(self);
2042 cased = 0;
2043 for (; p < e; p++) {
2044 if (islower(*p))
2045 return PyInt_FromLong(0);
2046 else if (!cased && isupper(*p))
2047 cased = 1;
2048 }
2049 return PyInt_FromLong(cased);
2050}
2051
2052
2053static char istitle__doc__[] =
2054"S.istitle() -> int\n\
2055\n\
2056Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2057may only follow uncased characters and lowercase characters only cased\n\
2058ones. Return 0 otherwise.";
2059
2060static PyObject*
2061string_istitle(PyStringObject *self, PyObject *args)
2062{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002063 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2064 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065 int cased, previous_is_cased;
2066
2067 if (!PyArg_NoArgs(args))
2068 return NULL;
2069
2070 /* Shortcut for single character strings */
2071 if (PyString_GET_SIZE(self) == 1)
2072 return PyInt_FromLong(isupper(*p) != 0);
2073
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002074 /* Special case for empty strings */
2075 if (PyString_GET_SIZE(self) == 0)
2076 return PyInt_FromLong(0);
2077
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 e = p + PyString_GET_SIZE(self);
2079 cased = 0;
2080 previous_is_cased = 0;
2081 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002082 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002083
2084 if (isupper(ch)) {
2085 if (previous_is_cased)
2086 return PyInt_FromLong(0);
2087 previous_is_cased = 1;
2088 cased = 1;
2089 }
2090 else if (islower(ch)) {
2091 if (!previous_is_cased)
2092 return PyInt_FromLong(0);
2093 previous_is_cased = 1;
2094 cased = 1;
2095 }
2096 else
2097 previous_is_cased = 0;
2098 }
2099 return PyInt_FromLong(cased);
2100}
2101
2102
2103static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002104"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002105\n\
2106Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002107Line breaks are not included in the resulting list unless keepends\n\
2108is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109
2110#define SPLIT_APPEND(data, left, right) \
2111 str = PyString_FromStringAndSize(data + left, right - left); \
2112 if (!str) \
2113 goto onError; \
2114 if (PyList_Append(list, str)) { \
2115 Py_DECREF(str); \
2116 goto onError; \
2117 } \
2118 else \
2119 Py_DECREF(str);
2120
2121static PyObject*
2122string_splitlines(PyStringObject *self, PyObject *args)
2123{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002124 register int i;
2125 register int j;
2126 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002127 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002128 PyObject *list;
2129 PyObject *str;
2130 char *data;
2131
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002132 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002133 return NULL;
2134
2135 data = PyString_AS_STRING(self);
2136 len = PyString_GET_SIZE(self);
2137
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 list = PyList_New(0);
2139 if (!list)
2140 goto onError;
2141
2142 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002143 int eol;
2144
Guido van Rossum4c08d552000-03-10 22:55:18 +00002145 /* Find a line and append it */
2146 while (i < len && data[i] != '\n' && data[i] != '\r')
2147 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002148
2149 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002150 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002151 if (i < len) {
2152 if (data[i] == '\r' && i + 1 < len &&
2153 data[i+1] == '\n')
2154 i += 2;
2155 else
2156 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002157 if (keepends)
2158 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002159 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002160 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002161 j = i;
2162 }
2163 if (j < len) {
2164 SPLIT_APPEND(data, j, len);
2165 }
2166
2167 return list;
2168
2169 onError:
2170 Py_DECREF(list);
2171 return NULL;
2172}
2173
2174#undef SPLIT_APPEND
2175
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176
2177static PyMethodDef
2178string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179 /* Counterparts of the obsolete stropmodule functions; except
2180 string.maketrans(). */
2181 {"join", (PyCFunction)string_join, 1, join__doc__},
2182 {"split", (PyCFunction)string_split, 1, split__doc__},
2183 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2184 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2185 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2186 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2187 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2188 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2189 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2191 {"count", (PyCFunction)string_count, 1, count__doc__},
2192 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2193 {"find", (PyCFunction)string_find, 1, find__doc__},
2194 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2197 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2198 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2199 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002200 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2201 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2202 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2204 {"title", (PyCFunction)string_title, 1, title__doc__},
2205 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2206 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2207 {"center", (PyCFunction)string_center, 1, center__doc__},
2208 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2209 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2210#if 0
2211 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2212#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213 {NULL, NULL} /* sentinel */
2214};
2215
2216static PyObject *
2217string_getattr(s, name)
2218 PyStringObject *s;
2219 char *name;
2220{
2221 return Py_FindMethod(string_methods, (PyObject*)s, name);
2222}
2223
2224
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002225PyTypeObject PyString_Type = {
2226 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002227 0,
2228 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002229 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002230 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002231 (destructor)string_dealloc, /*tp_dealloc*/
2232 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002234 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002235 (cmpfunc)string_compare, /*tp_compare*/
2236 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002237 0, /*tp_as_number*/
2238 &string_as_sequence, /*tp_as_sequence*/
2239 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002240 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002241 0, /*tp_call*/
2242 0, /*tp_str*/
2243 0, /*tp_getattro*/
2244 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002245 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002246 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002247 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002248};
2249
2250void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002251PyString_Concat(pv, w)
2252 register PyObject **pv;
2253 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002254{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002255 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002256 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002257 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002258 if (w == NULL || !PyString_Check(*pv)) {
2259 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002260 *pv = NULL;
2261 return;
2262 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002263 v = string_concat((PyStringObject *) *pv, w);
2264 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002265 *pv = v;
2266}
2267
Guido van Rossum013142a1994-08-30 08:19:36 +00002268void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002269PyString_ConcatAndDel(pv, w)
2270 register PyObject **pv;
2271 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002272{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002273 PyString_Concat(pv, w);
2274 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002275}
2276
2277
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002278/* The following function breaks the notion that strings are immutable:
2279 it changes the size of a string. We get away with this only if there
2280 is only one module referencing the object. You can also think of it
2281 as creating a new string object and destroying the old one, only
2282 more efficiently. In any case, don't use this if the string may
2283 already be known to some other part of the code... */
2284
2285int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002286_PyString_Resize(pv, newsize)
2287 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002288 int newsize;
2289{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002290 register PyObject *v;
2291 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002292 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002293 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002294 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002295 Py_DECREF(v);
2296 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002297 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002298 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002299 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002300#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002301 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002302#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002303 _Py_ForgetReference(v);
2304 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002305 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002306 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002307 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002308 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002309 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002310 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002311 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002312 _Py_NewReference(*pv);
2313 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002314 sv->ob_size = newsize;
2315 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002316 return 0;
2317}
Guido van Rossume5372401993-03-16 12:15:04 +00002318
2319/* Helpers for formatstring */
2320
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002321static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002322getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002323 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002324 int arglen;
2325 int *p_argidx;
2326{
2327 int argidx = *p_argidx;
2328 if (argidx < arglen) {
2329 (*p_argidx)++;
2330 if (arglen < 0)
2331 return args;
2332 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002333 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002334 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002335 PyErr_SetString(PyExc_TypeError,
2336 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002337 return NULL;
2338}
2339
2340#define F_LJUST (1<<0)
2341#define F_SIGN (1<<1)
2342#define F_BLANK (1<<2)
2343#define F_ALT (1<<3)
2344#define F_ZERO (1<<4)
2345
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002346static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002347formatfloat(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002348 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002349 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002350 int flags;
2351 int prec;
2352 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002353 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002354{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002355 /* fmt = '%#.' + `prec` + `type`
2356 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002357 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002358 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002359 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002360 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002361 if (prec < 0)
2362 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002363 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2364 type = 'g';
2365 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002366 /* worst case length calc to ensure no buffer overrun:
2367 fmt = %#.<prec>g
2368 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2369 for any double rep.)
2370 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2371 If prec=0 the effective precision is 1 (the leading digit is
2372 always given), therefore increase by one to 10+prec. */
2373 if (buflen <= (size_t)10 + (size_t)prec) {
2374 PyErr_SetString(PyExc_OverflowError,
2375 "formatted float is too long (precision too long?)");
2376 return -1;
2377 }
Guido van Rossume5372401993-03-16 12:15:04 +00002378 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002379 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002380}
2381
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002382static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002383formatint(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002384 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002385 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002386 int flags;
2387 int prec;
2388 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002389 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002390{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002391 /* fmt = '%#.' + `prec` + 'l' + `type`
2392 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002393 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002394 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002395 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002396 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002397 if (prec < 0)
2398 prec = 1;
2399 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002400 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2401 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2402 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2403 PyErr_SetString(PyExc_OverflowError,
2404 "formatted integer is too long (precision too long?)");
2405 return -1;
2406 }
Guido van Rossume5372401993-03-16 12:15:04 +00002407 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002408 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002409}
2410
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002411static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002412formatchar(buf, buflen, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002413 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002414 size_t buflen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002415 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002416{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002417 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002418 if (PyString_Check(v)) {
2419 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002420 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002421 }
2422 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002423 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002424 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002425 }
2426 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002427 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002428}
2429
Guido van Rossum013142a1994-08-30 08:19:36 +00002430
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002431/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2432
2433 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2434 chars are formatted. XXX This is a magic number. Each formatting
2435 routine does bounds checking to ensure no overflow, but a better
2436 solution may be to malloc a buffer of appropriate size for each
2437 format. For now, the current solution is sufficient.
2438*/
2439#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002440
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002441PyObject *
2442PyString_Format(format, args)
2443 PyObject *format;
2444 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002445{
2446 char *fmt, *res;
2447 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002448 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002449 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002450 PyObject *dict = NULL;
2451 if (format == NULL || !PyString_Check(format) || args == NULL) {
2452 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002453 return NULL;
2454 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002455 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002456 fmt = PyString_AsString(format);
2457 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002458 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002459 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002460 if (result == NULL)
2461 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002462 res = PyString_AsString(result);
2463 if (PyTuple_Check(args)) {
2464 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002465 argidx = 0;
2466 }
2467 else {
2468 arglen = -1;
2469 argidx = -2;
2470 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002471 if (args->ob_type->tp_as_mapping)
2472 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002473 while (--fmtcnt >= 0) {
2474 if (*fmt != '%') {
2475 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002476 rescnt = fmtcnt + 100;
2477 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002478 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002479 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002480 res = PyString_AsString(result)
2481 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002482 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002483 }
2484 *res++ = *fmt++;
2485 }
2486 else {
2487 /* Got a format specifier */
2488 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002489 int width = -1;
2490 int prec = -1;
2491 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002492 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002493 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002494 PyObject *v = NULL;
2495 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002496 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002497 int sign;
2498 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002499 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002500 char *fmt_start = fmt;
2501
Guido van Rossumda9c2711996-12-05 21:58:58 +00002502 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002503 if (*fmt == '(') {
2504 char *keystart;
2505 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002506 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002507 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002508
2509 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002510 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002511 "format requires a mapping");
2512 goto error;
2513 }
2514 ++fmt;
2515 --fmtcnt;
2516 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002517 /* Skip over balanced parentheses */
2518 while (pcount > 0 && --fmtcnt >= 0) {
2519 if (*fmt == ')')
2520 --pcount;
2521 else if (*fmt == '(')
2522 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002523 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002524 }
2525 keylen = fmt - keystart - 1;
2526 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002527 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002528 "incomplete format key");
2529 goto error;
2530 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002531 key = PyString_FromStringAndSize(keystart,
2532 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002533 if (key == NULL)
2534 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002535 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002536 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002537 args_owned = 0;
2538 }
2539 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002541 if (args == NULL) {
2542 goto error;
2543 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002544 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002545 arglen = -1;
2546 argidx = -2;
2547 }
Guido van Rossume5372401993-03-16 12:15:04 +00002548 while (--fmtcnt >= 0) {
2549 switch (c = *fmt++) {
2550 case '-': flags |= F_LJUST; continue;
2551 case '+': flags |= F_SIGN; continue;
2552 case ' ': flags |= F_BLANK; continue;
2553 case '#': flags |= F_ALT; continue;
2554 case '0': flags |= F_ZERO; continue;
2555 }
2556 break;
2557 }
2558 if (c == '*') {
2559 v = getnextarg(args, arglen, &argidx);
2560 if (v == NULL)
2561 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002562 if (!PyInt_Check(v)) {
2563 PyErr_SetString(PyExc_TypeError,
2564 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002565 goto error;
2566 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002567 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002568 if (width < 0) {
2569 flags |= F_LJUST;
2570 width = -width;
2571 }
Guido van Rossume5372401993-03-16 12:15:04 +00002572 if (--fmtcnt >= 0)
2573 c = *fmt++;
2574 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002575 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002576 width = c - '0';
2577 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002578 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002579 if (!isdigit(c))
2580 break;
2581 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002582 PyErr_SetString(
2583 PyExc_ValueError,
2584 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002585 goto error;
2586 }
2587 width = width*10 + (c - '0');
2588 }
2589 }
2590 if (c == '.') {
2591 prec = 0;
2592 if (--fmtcnt >= 0)
2593 c = *fmt++;
2594 if (c == '*') {
2595 v = getnextarg(args, arglen, &argidx);
2596 if (v == NULL)
2597 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002598 if (!PyInt_Check(v)) {
2599 PyErr_SetString(
2600 PyExc_TypeError,
2601 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002602 goto error;
2603 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002604 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002605 if (prec < 0)
2606 prec = 0;
2607 if (--fmtcnt >= 0)
2608 c = *fmt++;
2609 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002610 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002611 prec = c - '0';
2612 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002613 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002614 if (!isdigit(c))
2615 break;
2616 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002617 PyErr_SetString(
2618 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002619 "prec too big");
2620 goto error;
2621 }
2622 prec = prec*10 + (c - '0');
2623 }
2624 }
2625 } /* prec */
2626 if (fmtcnt >= 0) {
2627 if (c == 'h' || c == 'l' || c == 'L') {
2628 size = c;
2629 if (--fmtcnt >= 0)
2630 c = *fmt++;
2631 }
2632 }
2633 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002634 PyErr_SetString(PyExc_ValueError,
2635 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002636 goto error;
2637 }
2638 if (c != '%') {
2639 v = getnextarg(args, arglen, &argidx);
2640 if (v == NULL)
2641 goto error;
2642 }
2643 sign = 0;
2644 fill = ' ';
2645 switch (c) {
2646 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002647 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002648 len = 1;
2649 break;
2650 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002651 case 'r':
2652 if (PyUnicode_Check(v)) {
2653 fmt = fmt_start;
2654 goto unicode;
2655 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002656 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002657 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002658 else
2659 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002660 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002661 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002662 if (!PyString_Check(temp)) {
2663 PyErr_SetString(PyExc_TypeError,
2664 "%s argument has non-string str()");
2665 goto error;
2666 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002667 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002668 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002669 if (prec >= 0 && len > prec)
2670 len = prec;
2671 break;
2672 case 'i':
2673 case 'd':
2674 case 'u':
2675 case 'o':
2676 case 'x':
2677 case 'X':
2678 if (c == 'i')
2679 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002680 pbuf = formatbuf;
2681 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002682 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002683 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002684 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002685 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002686 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002687 if ((flags&F_ALT) &&
2688 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002689 pbuf[0] == '0' && pbuf[1] == c) {
2690 *res++ = *pbuf++;
2691 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002692 rescnt -= 2;
2693 len -= 2;
2694 width -= 2;
2695 if (width < 0)
2696 width = 0;
2697 }
2698 }
Guido van Rossume5372401993-03-16 12:15:04 +00002699 break;
2700 case 'e':
2701 case 'E':
2702 case 'f':
2703 case 'g':
2704 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002705 pbuf = formatbuf;
2706 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002707 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002708 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002709 sign = 1;
2710 if (flags&F_ZERO)
2711 fill = '0';
2712 break;
2713 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002714 pbuf = formatbuf;
2715 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002716 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002717 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002718 break;
2719 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002720 PyErr_Format(PyExc_ValueError,
2721 "unsupported format character '%c' (0x%x)",
2722 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002723 goto error;
2724 }
2725 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002726 if (*pbuf == '-' || *pbuf == '+') {
2727 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002728 len--;
2729 }
2730 else if (flags & F_SIGN)
2731 sign = '+';
2732 else if (flags & F_BLANK)
2733 sign = ' ';
2734 else
2735 sign = '\0';
2736 }
2737 if (width < len)
2738 width = len;
2739 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002740 reslen -= rescnt;
2741 rescnt = width + fmtcnt + 100;
2742 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002743 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002744 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002745 res = PyString_AsString(result)
2746 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002747 }
2748 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002749 if (fill != ' ')
2750 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002751 rescnt--;
2752 if (width > len)
2753 width--;
2754 }
2755 if (width > len && !(flags&F_LJUST)) {
2756 do {
2757 --rescnt;
2758 *res++ = fill;
2759 } while (--width > len);
2760 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002761 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002762 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002763 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002764 res += len;
2765 rescnt -= len;
2766 while (--width >= len) {
2767 --rescnt;
2768 *res++ = ' ';
2769 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002770 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002771 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002772 "not all arguments converted");
2773 goto error;
2774 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002775 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002776 } /* '%' */
2777 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002778 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002779 PyErr_SetString(PyExc_TypeError,
2780 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002781 goto error;
2782 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002783 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002784 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002785 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002786 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002787 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002788
2789 unicode:
2790 if (args_owned) {
2791 Py_DECREF(args);
2792 args_owned = 0;
2793 }
2794 /* Fiddle args right (remove the first argidx-1 arguments) */
2795 --argidx;
2796 if (PyTuple_Check(orig_args) && argidx > 0) {
2797 PyObject *v;
2798 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2799 v = PyTuple_New(n);
2800 if (v == NULL)
2801 goto error;
2802 while (--n >= 0) {
2803 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2804 Py_INCREF(w);
2805 PyTuple_SET_ITEM(v, n, w);
2806 }
2807 args = v;
2808 } else {
2809 Py_INCREF(orig_args);
2810 args = orig_args;
2811 }
2812 /* Paste rest of format string to what we have of the result
2813 string; we reuse result for this */
2814 rescnt = res - PyString_AS_STRING(result);
2815 fmtcnt = PyString_GET_SIZE(format) - \
2816 (fmt - PyString_AS_STRING(format));
2817 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2818 Py_DECREF(args);
2819 goto error;
2820 }
2821 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2822 format = result;
2823 /* Let Unicode do its magic */
2824 result = PyUnicode_Format(format, args);
2825 Py_DECREF(format);
2826 Py_DECREF(args);
2827 return result;
2828
Guido van Rossume5372401993-03-16 12:15:04 +00002829 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002830 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002831 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002832 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002833 }
Guido van Rossume5372401993-03-16 12:15:04 +00002834 return NULL;
2835}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002836
2837
2838#ifdef INTERN_STRINGS
2839
2840static PyObject *interned;
2841
2842void
2843PyString_InternInPlace(p)
2844 PyObject **p;
2845{
2846 register PyStringObject *s = (PyStringObject *)(*p);
2847 PyObject *t;
2848 if (s == NULL || !PyString_Check(s))
2849 Py_FatalError("PyString_InternInPlace: strings only please!");
2850 if ((t = s->ob_sinterned) != NULL) {
2851 if (t == (PyObject *)s)
2852 return;
2853 Py_INCREF(t);
2854 *p = t;
2855 Py_DECREF(s);
2856 return;
2857 }
2858 if (interned == NULL) {
2859 interned = PyDict_New();
2860 if (interned == NULL)
2861 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002862 }
2863 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2864 Py_INCREF(t);
2865 *p = s->ob_sinterned = t;
2866 Py_DECREF(s);
2867 return;
2868 }
2869 t = (PyObject *)s;
2870 if (PyDict_SetItem(interned, t, t) == 0) {
2871 s->ob_sinterned = t;
2872 return;
2873 }
2874 PyErr_Clear();
2875}
2876
2877
2878PyObject *
2879PyString_InternFromString(cp)
2880 const char *cp;
2881{
2882 PyObject *s = PyString_FromString(cp);
2883 if (s == NULL)
2884 return NULL;
2885 PyString_InternInPlace(&s);
2886 return s;
2887}
2888
2889#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002890
2891void
2892PyString_Fini()
2893{
2894 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002895 for (i = 0; i < UCHAR_MAX + 1; i++) {
2896 Py_XDECREF(characters[i]);
2897 characters[i] = NULL;
2898 }
2899#ifndef DONT_SHARE_SHORT_STRINGS
2900 Py_XDECREF(nullstring);
2901 nullstring = NULL;
2902#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002903#ifdef INTERN_STRINGS
2904 if (interned) {
2905 int pos, changed;
2906 PyObject *key, *value;
2907 do {
2908 changed = 0;
2909 pos = 0;
2910 while (PyDict_Next(interned, &pos, &key, &value)) {
2911 if (key->ob_refcnt == 2 && key == value) {
2912 PyDict_DelItem(interned, key);
2913 changed = 1;
2914 }
2915 }
2916 } while (changed);
2917 }
2918#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002919}