blob: 288f26e22081608e0d1e342771a91f8f273edf32 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum71160aa1997-06-03 18:03:18 +000036#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000037#include <ctype.h>
38
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000039#ifdef COUNT_ALLOCS
40int null_strings, one_strings;
41#endif
42
Guido van Rossum03093a21994-09-28 15:51:32 +000043#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000044#include <limits.h>
45#else
46#ifndef UCHAR_MAX
47#define UCHAR_MAX 255
48#endif
49#endif
50
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000052#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055
56/*
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
64 not NULL.
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
70*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071PyObject *
72PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000073 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074 int size;
75{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000077#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
92 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000095
96 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000098 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000099 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000101 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102#ifdef CACHE_HASH
103 op->ob_shash = -1;
104#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000105#ifdef INTERN_STRINGS
106 op->ob_sinterned = NULL;
107#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000108 if (str != NULL)
109 memcpy(op->ob_sval, str, size);
110 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0) {
113 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 } else if (size == 1 && str != NULL) {
116 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121}
122
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123PyObject *
124PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000125 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126{
127 register unsigned int size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000129#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 if (size == 0 && (op = nullstring) != NULL) {
131#ifdef COUNT_ALLOCS
132 null_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138#ifdef COUNT_ALLOCS
139 one_strings++;
140#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000144#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145
146 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000148 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000149 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000151 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152#ifdef CACHE_HASH
153 op->ob_shash = -1;
154#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000155#ifdef INTERN_STRINGS
156 op->ob_sinterned = NULL;
157#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000158 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000159#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 if (size == 0) {
161 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000162 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000163 } else if (size == 1) {
164 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000165 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000166 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000167#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000168 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169}
170
Guido van Rossum234f9421993-06-17 12:35:49 +0000171static void
Guido van Rossume5372401993-03-16 12:15:04 +0000172string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000173 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000174{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000175 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000176}
177
Guido van Rossumd7047b31995-01-02 19:07:15 +0000178int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000179PyString_Size(op)
180 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000182 if (!PyString_Check(op)) {
183 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184 return -1;
185 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000186 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000187}
188
189/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000190PyString_AsString(op)
191 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000193 if (!PyString_Check(op)) {
194 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 return NULL;
196 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000197 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198}
199
200/* Methods */
201
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000202static int
Guido van Rossume5372401993-03-16 12:15:04 +0000203string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000204 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 FILE *fp;
206 int flags;
207{
208 int i;
209 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000210 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000212 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000214 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216
217 /* figure out which quote to use; single is prefered */
218 quote = '\'';
219 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
220 quote = '"';
221
222 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 for (i = 0; i < op->ob_size; i++) {
224 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 fprintf(fp, "\\%c", c);
227 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000228 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000232 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000233 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234}
235
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000236static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000237string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239{
240 /* XXX overflow? */
241 int newsize = 2 + 4 * op->ob_size * sizeof(char);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000242 PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000244 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000245 }
246 else {
247 register int i;
248 register char c;
249 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000250 int quote;
251
252 /* figure out which quote to use; single is prefered */
253 quote = '\'';
254 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
255 quote = '"';
256
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000257 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 for (i = 0; i < op->ob_size; i++) {
260 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 *p++ = '\\', *p++ = c;
263 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000264 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 while (*p != '\0')
266 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 }
268 else
269 *p++ = c;
270 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000271 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000273 _PyString_Resize(
274 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000275 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277}
278
279static int
Guido van Rossume5372401993-03-16 12:15:04 +0000280string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282{
283 return a->ob_size;
284}
285
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000286static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000287string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000288 register PyStringObject *a;
289 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290{
291 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 register PyStringObject *op;
293 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000294 if (PyUnicode_Check(bb))
295 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000296 PyErr_BadArgument();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297 return NULL;
298 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000299#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 /* Optimize cases with empty left or right operand */
301 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000302 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303 return bb;
304 }
305 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000306 Py_INCREF(a);
307 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000308 }
309 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000310 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000311 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000312 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000313 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000314 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000315 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000316#ifdef CACHE_HASH
317 op->ob_shash = -1;
318#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000319#ifdef INTERN_STRINGS
320 op->ob_sinterned = NULL;
321#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000322 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
323 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
324 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326#undef b
327}
328
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000329static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000330string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000331 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000332 register int n;
333{
334 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000335 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000336 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 if (n < 0)
338 n = 0;
339 size = a->ob_size * n;
340 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000341 Py_INCREF(a);
342 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000344 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000346 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000347 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000348 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000349 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000350#ifdef CACHE_HASH
351 op->ob_shash = -1;
352#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000353#ifdef INTERN_STRINGS
354 op->ob_sinterned = NULL;
355#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000356 for (i = 0; i < size; i += a->ob_size)
357 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
358 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000359 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000360}
361
362/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
363
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000364static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000365string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000367 register int i, j; /* May be negative! */
368{
369 if (i < 0)
370 i = 0;
371 if (j < 0)
372 j = 0; /* Avoid signed/unsigned bug in next line */
373 if (j > a->ob_size)
374 j = a->ob_size;
375 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000376 Py_INCREF(a);
377 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000378 }
379 if (j < i)
380 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000381 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382}
383
Guido van Rossum9284a572000-03-07 15:53:43 +0000384static int
385string_contains(a, el)
386PyObject *a, *el;
387{
388 register char *s, *end;
389 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000390 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000391 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000392 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000393 PyErr_SetString(PyExc_TypeError,
394 "string member test needs char left operand");
395 return -1;
396 }
397 c = PyString_AsString(el)[0];
398 s = PyString_AsString(a);
399 end = s + PyString_Size(a);
400 while (s < end) {
401 if (c == *s++)
402 return 1;
403 }
404 return 0;
405}
406
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000407static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000408string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 register int i;
411{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000412 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000413 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000414 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 return NULL;
417 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000418 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000419 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000420#ifdef COUNT_ALLOCS
421 if (v != NULL)
422 one_strings++;
423#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000424 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000425 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000426 if (v == NULL)
427 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 characters[c] = (PyStringObject *) v;
429 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000430 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000431 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000432 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000433}
434
435static int
Guido van Rossume5372401993-03-16 12:15:04 +0000436string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000437 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000438{
Guido van Rossum253919f1991-02-13 23:18:39 +0000439 int len_a = a->ob_size, len_b = b->ob_size;
440 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000441 int cmp;
442 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000443 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000444 if (cmp == 0)
445 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
446 if (cmp != 0)
447 return cmp;
448 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000449 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450}
451
Guido van Rossum9bfef441993-03-29 10:43:31 +0000452static long
453string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000454 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000455{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000456 register int len;
457 register unsigned char *p;
458 register long x;
459
460#ifdef CACHE_HASH
461 if (a->ob_shash != -1)
462 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000463#ifdef INTERN_STRINGS
464 if (a->ob_sinterned != NULL)
465 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000466 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000467#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000468#endif
469 len = a->ob_size;
470 p = (unsigned char *) a->ob_sval;
471 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000472 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000473 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000474 x ^= a->ob_size;
475 if (x == -1)
476 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000477#ifdef CACHE_HASH
478 a->ob_shash = x;
479#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000480 return x;
481}
482
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000483static int
484string_buffer_getreadbuf(self, index, ptr)
485 PyStringObject *self;
486 int index;
487 const void **ptr;
488{
489 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000490 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000491 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000492 return -1;
493 }
494 *ptr = (void *)self->ob_sval;
495 return self->ob_size;
496}
497
498static int
499string_buffer_getwritebuf(self, index, ptr)
500 PyStringObject *self;
501 int index;
502 const void **ptr;
503{
Guido van Rossum045e6881997-09-08 18:30:11 +0000504 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000505 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000506 return -1;
507}
508
509static int
510string_buffer_getsegcount(self, lenp)
511 PyStringObject *self;
512 int *lenp;
513{
514 if ( lenp )
515 *lenp = self->ob_size;
516 return 1;
517}
518
Guido van Rossum1db70701998-10-08 02:18:52 +0000519static int
520string_buffer_getcharbuf(self, index, ptr)
521 PyStringObject *self;
522 int index;
523 const char **ptr;
524{
525 if ( index != 0 ) {
526 PyErr_SetString(PyExc_SystemError,
527 "accessing non-existent string segment");
528 return -1;
529 }
530 *ptr = self->ob_sval;
531 return self->ob_size;
532}
533
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000534static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000535 (inquiry)string_length, /*sq_length*/
536 (binaryfunc)string_concat, /*sq_concat*/
537 (intargfunc)string_repeat, /*sq_repeat*/
538 (intargfunc)string_item, /*sq_item*/
539 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000540 0, /*sq_ass_item*/
541 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000542 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000543};
544
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000545static PyBufferProcs string_as_buffer = {
546 (getreadbufferproc)string_buffer_getreadbuf,
547 (getwritebufferproc)string_buffer_getwritebuf,
548 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000549 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000550};
551
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000552
553
554#define LEFTSTRIP 0
555#define RIGHTSTRIP 1
556#define BOTHSTRIP 2
557
558
559static PyObject *
560split_whitespace(s, len, maxsplit)
561 char *s;
562 int len;
563 int maxsplit;
564{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000565 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000566 PyObject* item;
567 PyObject *list = PyList_New(0);
568
569 if (list == NULL)
570 return NULL;
571
Guido van Rossum4c08d552000-03-10 22:55:18 +0000572 for (i = j = 0; i < len; ) {
573 while (i < len && isspace(Py_CHARMASK(s[i])))
574 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000575 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000576 while (i < len && !isspace(Py_CHARMASK(s[i])))
577 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000578 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000579 if (maxsplit-- <= 0)
580 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000581 item = PyString_FromStringAndSize(s+j, (int)(i-j));
582 if (item == NULL)
583 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000584 err = PyList_Append(list, item);
585 Py_DECREF(item);
586 if (err < 0)
587 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000588 while (i < len && isspace(Py_CHARMASK(s[i])))
589 i++;
590 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000591 }
592 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000593 if (j < len) {
594 item = PyString_FromStringAndSize(s+j, (int)(len - j));
595 if (item == NULL)
596 goto finally;
597 err = PyList_Append(list, item);
598 Py_DECREF(item);
599 if (err < 0)
600 goto finally;
601 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000602 return list;
603 finally:
604 Py_DECREF(list);
605 return NULL;
606}
607
608
609static char split__doc__[] =
610"S.split([sep [,maxsplit]]) -> list of strings\n\
611\n\
612Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000613delimiter string. If maxsplit is given, at most maxsplit\n\
614splits are done. If sep is not specified, any whitespace string\n\
615is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000616
617static PyObject *
618string_split(self, args)
619 PyStringObject *self;
620 PyObject *args;
621{
622 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000623 int maxsplit = -1;
624 const char *s = PyString_AS_STRING(self), *sub;
625 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000626
Guido van Rossum4c08d552000-03-10 22:55:18 +0000627 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000628 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000629 if (maxsplit < 0)
630 maxsplit = INT_MAX;
631 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000632 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000633 if (PyString_Check(subobj)) {
634 sub = PyString_AS_STRING(subobj);
635 n = PyString_GET_SIZE(subobj);
636 }
637 else if (PyUnicode_Check(subobj))
638 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
639 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
640 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000641 if (n == 0) {
642 PyErr_SetString(PyExc_ValueError, "empty separator");
643 return NULL;
644 }
645
646 list = PyList_New(0);
647 if (list == NULL)
648 return NULL;
649
650 i = j = 0;
651 while (i+n <= len) {
652 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000653 if (maxsplit-- <= 0)
654 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000655 item = PyString_FromStringAndSize(s+j, (int)(i-j));
656 if (item == NULL)
657 goto fail;
658 err = PyList_Append(list, item);
659 Py_DECREF(item);
660 if (err < 0)
661 goto fail;
662 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000663 }
664 else
665 i++;
666 }
667 item = PyString_FromStringAndSize(s+j, (int)(len-j));
668 if (item == NULL)
669 goto fail;
670 err = PyList_Append(list, item);
671 Py_DECREF(item);
672 if (err < 0)
673 goto fail;
674
675 return list;
676
677 fail:
678 Py_DECREF(list);
679 return NULL;
680}
681
682
683static char join__doc__[] =
684"S.join(sequence) -> string\n\
685\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000686Return a string which is the concatenation of the strings in the\n\
687sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688
689static PyObject *
690string_join(self, args)
691 PyStringObject *self;
692 PyObject *args;
693{
694 char *sep = PyString_AS_STRING(self);
695 int seplen = PyString_GET_SIZE(self);
696 PyObject *res = NULL;
697 int reslen = 0;
698 char *p;
699 int seqlen = 0;
700 int sz = 100;
701 int i, slen;
702 PyObject *seq;
703
Guido van Rossum43713e52000-02-29 13:59:29 +0000704 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000705 return NULL;
706
707 seqlen = PySequence_Length(seq);
708 if (seqlen < 0 && PyErr_Occurred())
709 return NULL;
710
711 if (seqlen == 1) {
712 /* Optimization if there's only one item */
713 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000714 if (item == NULL)
715 return NULL;
716 if (!PyString_Check(item) &&
717 !PyUnicode_Check(item)) {
718 PyErr_SetString(PyExc_TypeError,
719 "first argument must be sequence of strings");
720 Py_DECREF(item);
721 return NULL;
722 }
723 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000724 }
725 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
726 return NULL;
727 p = PyString_AsString(res);
728
729 /* optimize for lists. all others (tuples and arbitrary sequences)
730 * just use the abstract interface.
731 */
732 if (PyList_Check(seq)) {
733 for (i = 0; i < seqlen; i++) {
734 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000735 if (!PyString_Check(item)){
736 if (PyUnicode_Check(item)) {
737 Py_DECREF(res);
738 return PyUnicode_Join(
739 (PyObject *)self,
740 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000741 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000742 PyErr_Format(PyExc_TypeError,
743 "sequence item %i not a string",
744 i);
745 goto finally;
746 }
747 slen = PyString_GET_SIZE(item);
748 while (reslen + slen + seplen >= sz) {
749 if (_PyString_Resize(&res, sz*2))
750 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000751 sz *= 2;
752 p = PyString_AsString(res) + reslen;
753 }
754 if (i > 0) {
755 memcpy(p, sep, seplen);
756 p += seplen;
757 reslen += seplen;
758 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000759 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000760 p += slen;
761 reslen += slen;
762 }
763 }
764 else {
765 for (i = 0; i < seqlen; i++) {
766 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000767 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000768 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000769 if (!PyString_Check(item)){
770 if (PyUnicode_Check(item)) {
771 Py_DECREF(res);
772 Py_DECREF(item);
773 return PyUnicode_Join(
774 (PyObject *)self,
775 seq);
776 }
777 Py_DECREF(item);
778 PyErr_Format(PyExc_TypeError,
779 "sequence item %i not a string",
780 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000781 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000782 }
783 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000784 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000785 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000786 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000787 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000788 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 sz *= 2;
790 p = PyString_AsString(res) + reslen;
791 }
792 if (i > 0) {
793 memcpy(p, sep, seplen);
794 p += seplen;
795 reslen += seplen;
796 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000797 memcpy(p, PyString_AS_STRING(item), slen);
798 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000799 p += slen;
800 reslen += slen;
801 }
802 }
803 if (_PyString_Resize(&res, reslen))
804 goto finally;
805 return res;
806
807 finally:
808 Py_DECREF(res);
809 return NULL;
810}
811
812
813
814static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000815string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000816 PyStringObject *self;
817 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000818 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000819{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000820 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821 int len = PyString_GET_SIZE(self);
822 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000823 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000824
Guido van Rossum4c08d552000-03-10 22:55:18 +0000825 if (!PyArg_ParseTuple(args, "O|ii:find/rfind/index/rindex",
826 &subobj, &i, &last))
827 return -2;
828 if (PyString_Check(subobj)) {
829 sub = PyString_AS_STRING(subobj);
830 n = PyString_GET_SIZE(subobj);
831 }
832 else if (PyUnicode_Check(subobj))
833 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
834 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000835 return -2;
836
837 if (last > len)
838 last = len;
839 if (last < 0)
840 last += len;
841 if (last < 0)
842 last = 0;
843 if (i < 0)
844 i += len;
845 if (i < 0)
846 i = 0;
847
Guido van Rossum4c08d552000-03-10 22:55:18 +0000848 if (dir > 0) {
849 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000850 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000851 last -= n;
852 for (; i <= last; ++i)
853 if (s[i] == sub[0] &&
854 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
855 return (long)i;
856 }
857 else {
858 int j;
859
860 if (n == 0 && i <= last)
861 return (long)last;
862 for (j = last-n; j >= i; --j)
863 if (s[j] == sub[0] &&
864 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
865 return (long)j;
866 }
867
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868 return -1;
869}
870
871
872static char find__doc__[] =
873"S.find(sub [,start [,end]]) -> int\n\
874\n\
875Return the lowest index in S where substring sub is found,\n\
876such that sub is contained within s[start,end]. Optional\n\
877arguments start and end are interpreted as in slice notation.\n\
878\n\
879Return -1 on failure.";
880
881static PyObject *
882string_find(self, args)
883 PyStringObject *self;
884 PyObject *args;
885{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000886 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000887 if (result == -2)
888 return NULL;
889 return PyInt_FromLong(result);
890}
891
892
893static char index__doc__[] =
894"S.index(sub [,start [,end]]) -> int\n\
895\n\
896Like S.find() but raise ValueError when the substring is not found.";
897
898static PyObject *
899string_index(self, args)
900 PyStringObject *self;
901 PyObject *args;
902{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000903 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000904 if (result == -2)
905 return NULL;
906 if (result == -1) {
907 PyErr_SetString(PyExc_ValueError,
908 "substring not found in string.index");
909 return NULL;
910 }
911 return PyInt_FromLong(result);
912}
913
914
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000915static char rfind__doc__[] =
916"S.rfind(sub [,start [,end]]) -> int\n\
917\n\
918Return the highest index in S where substring sub is found,\n\
919such that sub is contained within s[start,end]. Optional\n\
920arguments start and end are interpreted as in slice notation.\n\
921\n\
922Return -1 on failure.";
923
924static PyObject *
925string_rfind(self, args)
926 PyStringObject *self;
927 PyObject *args;
928{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000929 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000930 if (result == -2)
931 return NULL;
932 return PyInt_FromLong(result);
933}
934
935
936static char rindex__doc__[] =
937"S.rindex(sub [,start [,end]]) -> int\n\
938\n\
939Like S.rfind() but raise ValueError when the substring is not found.";
940
941static PyObject *
942string_rindex(self, args)
943 PyStringObject *self;
944 PyObject *args;
945{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000946 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000947 if (result == -2)
948 return NULL;
949 if (result == -1) {
950 PyErr_SetString(PyExc_ValueError,
951 "substring not found in string.rindex");
952 return NULL;
953 }
954 return PyInt_FromLong(result);
955}
956
957
958static PyObject *
959do_strip(self, args, striptype)
960 PyStringObject *self;
961 PyObject *args;
962 int striptype;
963{
964 char *s = PyString_AS_STRING(self);
965 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000966
Guido van Rossum43713e52000-02-29 13:59:29 +0000967 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000968 return NULL;
969
970 i = 0;
971 if (striptype != RIGHTSTRIP) {
972 while (i < len && isspace(Py_CHARMASK(s[i]))) {
973 i++;
974 }
975 }
976
977 j = len;
978 if (striptype != LEFTSTRIP) {
979 do {
980 j--;
981 } while (j >= i && isspace(Py_CHARMASK(s[j])));
982 j++;
983 }
984
985 if (i == 0 && j == len) {
986 Py_INCREF(self);
987 return (PyObject*)self;
988 }
989 else
990 return PyString_FromStringAndSize(s+i, j-i);
991}
992
993
994static char strip__doc__[] =
995"S.strip() -> string\n\
996\n\
997Return a copy of the string S with leading and trailing\n\
998whitespace removed.";
999
1000static PyObject *
1001string_strip(self, args)
1002 PyStringObject *self;
1003 PyObject *args;
1004{
1005 return do_strip(self, args, BOTHSTRIP);
1006}
1007
1008
1009static char lstrip__doc__[] =
1010"S.lstrip() -> string\n\
1011\n\
1012Return a copy of the string S with leading whitespace removed.";
1013
1014static PyObject *
1015string_lstrip(self, args)
1016 PyStringObject *self;
1017 PyObject *args;
1018{
1019 return do_strip(self, args, LEFTSTRIP);
1020}
1021
1022
1023static char rstrip__doc__[] =
1024"S.rstrip() -> string\n\
1025\n\
1026Return a copy of the string S with trailing whitespace removed.";
1027
1028static PyObject *
1029string_rstrip(self, args)
1030 PyStringObject *self;
1031 PyObject *args;
1032{
1033 return do_strip(self, args, RIGHTSTRIP);
1034}
1035
1036
1037static char lower__doc__[] =
1038"S.lower() -> string\n\
1039\n\
1040Return a copy of the string S converted to lowercase.";
1041
1042static PyObject *
1043string_lower(self, args)
1044 PyStringObject *self;
1045 PyObject *args;
1046{
1047 char *s = PyString_AS_STRING(self), *s_new;
1048 int i, n = PyString_GET_SIZE(self);
1049 PyObject *new;
1050
Guido van Rossum43713e52000-02-29 13:59:29 +00001051 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 return NULL;
1053 new = PyString_FromStringAndSize(NULL, n);
1054 if (new == NULL)
1055 return NULL;
1056 s_new = PyString_AsString(new);
1057 for (i = 0; i < n; i++) {
1058 int c = Py_CHARMASK(*s++);
1059 if (isupper(c)) {
1060 *s_new = tolower(c);
1061 } else
1062 *s_new = c;
1063 s_new++;
1064 }
1065 return new;
1066}
1067
1068
1069static char upper__doc__[] =
1070"S.upper() -> string\n\
1071\n\
1072Return a copy of the string S converted to uppercase.";
1073
1074static PyObject *
1075string_upper(self, args)
1076 PyStringObject *self;
1077 PyObject *args;
1078{
1079 char *s = PyString_AS_STRING(self), *s_new;
1080 int i, n = PyString_GET_SIZE(self);
1081 PyObject *new;
1082
Guido van Rossum43713e52000-02-29 13:59:29 +00001083 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001084 return NULL;
1085 new = PyString_FromStringAndSize(NULL, n);
1086 if (new == NULL)
1087 return NULL;
1088 s_new = PyString_AsString(new);
1089 for (i = 0; i < n; i++) {
1090 int c = Py_CHARMASK(*s++);
1091 if (islower(c)) {
1092 *s_new = toupper(c);
1093 } else
1094 *s_new = c;
1095 s_new++;
1096 }
1097 return new;
1098}
1099
1100
Guido van Rossum4c08d552000-03-10 22:55:18 +00001101static char title__doc__[] =
1102"S.title() -> string\n\
1103\n\
1104Return a titlecased version of S, i.e. words start with uppercase\n\
1105characters, all remaining cased characters have lowercase.";
1106
1107static PyObject*
1108string_title(PyUnicodeObject *self, PyObject *args)
1109{
1110 char *s = PyString_AS_STRING(self), *s_new;
1111 int i, n = PyString_GET_SIZE(self);
1112 int previous_is_cased = 0;
1113 PyObject *new;
1114
1115 if (!PyArg_ParseTuple(args, ":title"))
1116 return NULL;
1117 new = PyString_FromStringAndSize(NULL, n);
1118 if (new == NULL)
1119 return NULL;
1120 s_new = PyString_AsString(new);
1121 for (i = 0; i < n; i++) {
1122 int c = Py_CHARMASK(*s++);
1123 if (islower(c)) {
1124 if (!previous_is_cased)
1125 c = toupper(c);
1126 previous_is_cased = 1;
1127 } else if (isupper(c)) {
1128 if (previous_is_cased)
1129 c = tolower(c);
1130 previous_is_cased = 1;
1131 } else
1132 previous_is_cased = 0;
1133 *s_new++ = c;
1134 }
1135 return new;
1136}
1137
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138static char capitalize__doc__[] =
1139"S.capitalize() -> string\n\
1140\n\
1141Return a copy of the string S with only its first character\n\
1142capitalized.";
1143
1144static PyObject *
1145string_capitalize(self, args)
1146 PyStringObject *self;
1147 PyObject *args;
1148{
1149 char *s = PyString_AS_STRING(self), *s_new;
1150 int i, n = PyString_GET_SIZE(self);
1151 PyObject *new;
1152
Guido van Rossum43713e52000-02-29 13:59:29 +00001153 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154 return NULL;
1155 new = PyString_FromStringAndSize(NULL, n);
1156 if (new == NULL)
1157 return NULL;
1158 s_new = PyString_AsString(new);
1159 if (0 < n) {
1160 int c = Py_CHARMASK(*s++);
1161 if (islower(c))
1162 *s_new = toupper(c);
1163 else
1164 *s_new = c;
1165 s_new++;
1166 }
1167 for (i = 1; i < n; i++) {
1168 int c = Py_CHARMASK(*s++);
1169 if (isupper(c))
1170 *s_new = tolower(c);
1171 else
1172 *s_new = c;
1173 s_new++;
1174 }
1175 return new;
1176}
1177
1178
1179static char count__doc__[] =
1180"S.count(sub[, start[, end]]) -> int\n\
1181\n\
1182Return the number of occurrences of substring sub in string\n\
1183S[start:end]. Optional arguments start and end are\n\
1184interpreted as in slice notation.";
1185
1186static PyObject *
1187string_count(self, args)
1188 PyStringObject *self;
1189 PyObject *args;
1190{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001191 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001192 int len = PyString_GET_SIZE(self), n;
1193 int i = 0, last = INT_MAX;
1194 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001195 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001196
Guido van Rossum4c08d552000-03-10 22:55:18 +00001197 if (!PyArg_ParseTuple(args, "O|ii:count", &subobj, &i, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001198 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001199 if (PyString_Check(subobj)) {
1200 sub = PyString_AS_STRING(subobj);
1201 n = PyString_GET_SIZE(subobj);
1202 }
1203 else if (PyUnicode_Check(subobj))
1204 return PyInt_FromLong(
1205 PyUnicode_Count((PyObject *)self, subobj, i, last));
1206 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1207 return NULL;
1208
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001209 if (last > len)
1210 last = len;
1211 if (last < 0)
1212 last += len;
1213 if (last < 0)
1214 last = 0;
1215 if (i < 0)
1216 i += len;
1217 if (i < 0)
1218 i = 0;
1219 m = last + 1 - n;
1220 if (n == 0)
1221 return PyInt_FromLong((long) (m-i));
1222
1223 r = 0;
1224 while (i < m) {
1225 if (!memcmp(s+i, sub, n)) {
1226 r++;
1227 i += n;
1228 } else {
1229 i++;
1230 }
1231 }
1232 return PyInt_FromLong((long) r);
1233}
1234
1235
1236static char swapcase__doc__[] =
1237"S.swapcase() -> string\n\
1238\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001239Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001240converted to lowercase and vice versa.";
1241
1242static PyObject *
1243string_swapcase(self, args)
1244 PyStringObject *self;
1245 PyObject *args;
1246{
1247 char *s = PyString_AS_STRING(self), *s_new;
1248 int i, n = PyString_GET_SIZE(self);
1249 PyObject *new;
1250
Guido van Rossum43713e52000-02-29 13:59:29 +00001251 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001252 return NULL;
1253 new = PyString_FromStringAndSize(NULL, n);
1254 if (new == NULL)
1255 return NULL;
1256 s_new = PyString_AsString(new);
1257 for (i = 0; i < n; i++) {
1258 int c = Py_CHARMASK(*s++);
1259 if (islower(c)) {
1260 *s_new = toupper(c);
1261 }
1262 else if (isupper(c)) {
1263 *s_new = tolower(c);
1264 }
1265 else
1266 *s_new = c;
1267 s_new++;
1268 }
1269 return new;
1270}
1271
1272
1273static char translate__doc__[] =
1274"S.translate(table [,deletechars]) -> string\n\
1275\n\
1276Return a copy of the string S, where all characters occurring\n\
1277in the optional argument deletechars are removed, and the\n\
1278remaining characters have been mapped through the given\n\
1279translation table, which must be a string of length 256.";
1280
1281static PyObject *
1282string_translate(self, args)
1283 PyStringObject *self;
1284 PyObject *args;
1285{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286 register char *input, *output;
1287 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 register int i, c, changed = 0;
1289 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001290 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001291 int inlen, tablen, dellen = 0;
1292 PyObject *result;
1293 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001294 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 if (!PyArg_ParseTuple(args, "O|O:translate",
1297 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001298 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001299
1300 if (PyString_Check(tableobj)) {
1301 table1 = PyString_AS_STRING(tableobj);
1302 tablen = PyString_GET_SIZE(tableobj);
1303 }
1304 else if (PyUnicode_Check(tableobj)) {
1305 /* Unicode .translate() does not support the deletechars
1306 parameter; instead a mapping to None will cause characters
1307 to be deleted. */
1308 if (delobj != NULL) {
1309 PyErr_SetString(PyExc_TypeError,
1310 "deletions are implemented differently for unicode");
1311 return NULL;
1312 }
1313 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1314 }
1315 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001316 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001317
1318 if (delobj != NULL) {
1319 if (PyString_Check(delobj)) {
1320 del_table = PyString_AS_STRING(delobj);
1321 dellen = PyString_GET_SIZE(delobj);
1322 }
1323 else if (PyUnicode_Check(delobj)) {
1324 PyErr_SetString(PyExc_TypeError,
1325 "deletions are implemented differently for unicode");
1326 return NULL;
1327 }
1328 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1329 return NULL;
1330
1331 if (tablen != 256) {
1332 PyErr_SetString(PyExc_ValueError,
1333 "translation table must be 256 characters long");
1334 return NULL;
1335 }
1336 }
1337 else {
1338 del_table = NULL;
1339 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340 }
1341
1342 table = table1;
1343 inlen = PyString_Size(input_obj);
1344 result = PyString_FromStringAndSize((char *)NULL, inlen);
1345 if (result == NULL)
1346 return NULL;
1347 output_start = output = PyString_AsString(result);
1348 input = PyString_AsString(input_obj);
1349
1350 if (dellen == 0) {
1351 /* If no deletions are required, use faster code */
1352 for (i = inlen; --i >= 0; ) {
1353 c = Py_CHARMASK(*input++);
1354 if (Py_CHARMASK((*output++ = table[c])) != c)
1355 changed = 1;
1356 }
1357 if (changed)
1358 return result;
1359 Py_DECREF(result);
1360 Py_INCREF(input_obj);
1361 return input_obj;
1362 }
1363
1364 for (i = 0; i < 256; i++)
1365 trans_table[i] = Py_CHARMASK(table[i]);
1366
1367 for (i = 0; i < dellen; i++)
1368 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1369
1370 for (i = inlen; --i >= 0; ) {
1371 c = Py_CHARMASK(*input++);
1372 if (trans_table[c] != -1)
1373 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1374 continue;
1375 changed = 1;
1376 }
1377 if (!changed) {
1378 Py_DECREF(result);
1379 Py_INCREF(input_obj);
1380 return input_obj;
1381 }
1382 /* Fix the size of the resulting string */
1383 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1384 return NULL;
1385 return result;
1386}
1387
1388
1389/* What follows is used for implementing replace(). Perry Stoll. */
1390
1391/*
1392 mymemfind
1393
1394 strstr replacement for arbitrary blocks of memory.
1395
Barry Warsaw51ac5802000-03-20 16:36:48 +00001396 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397 contents of memory pointed to by PAT. Returns the index into MEM if
1398 found, or -1 if not found. If len of PAT is greater than length of
1399 MEM, the function returns -1.
1400*/
1401static int
1402mymemfind(mem, len, pat, pat_len)
1403 char *mem;
1404 int len;
1405 char *pat;
1406 int pat_len;
1407{
1408 register int ii;
1409
1410 /* pattern can not occur in the last pat_len-1 chars */
1411 len -= pat_len;
1412
1413 for (ii = 0; ii <= len; ii++) {
1414 if (mem[ii] == pat[0] &&
1415 (pat_len == 1 ||
1416 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1417 return ii;
1418 }
1419 }
1420 return -1;
1421}
1422
1423/*
1424 mymemcnt
1425
1426 Return the number of distinct times PAT is found in MEM.
1427 meaning mem=1111 and pat==11 returns 2.
1428 mem=11111 and pat==11 also return 2.
1429 */
1430static int
1431mymemcnt(mem, len, pat, pat_len)
1432 char *mem;
1433 int len;
1434 char *pat;
1435 int pat_len;
1436{
1437 register int offset = 0;
1438 int nfound = 0;
1439
1440 while (len >= 0) {
1441 offset = mymemfind(mem, len, pat, pat_len);
1442 if (offset == -1)
1443 break;
1444 mem += offset + pat_len;
1445 len -= offset + pat_len;
1446 nfound++;
1447 }
1448 return nfound;
1449}
1450
1451/*
1452 mymemreplace
1453
1454 Return a string in which all occurences of PAT in memory STR are
1455 replaced with SUB.
1456
1457 If length of PAT is less than length of STR or there are no occurences
1458 of PAT in STR, then the original string is returned. Otherwise, a new
1459 string is allocated here and returned.
1460
1461 on return, out_len is:
1462 the length of output string, or
1463 -1 if the input string is returned, or
1464 unchanged if an error occurs (no memory).
1465
1466 return value is:
1467 the new string allocated locally, or
1468 NULL if an error occurred.
1469*/
1470static char *
1471mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1472 char *str;
1473 int len; /* input string */
1474 char *pat;
1475 int pat_len; /* pattern string to find */
1476 char *sub;
1477 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001478 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479 int *out_len;
1480
1481{
1482 char *out_s;
1483 char *new_s;
1484 int nfound, offset, new_len;
1485
1486 if (len == 0 || pat_len > len)
1487 goto return_same;
1488
1489 /* find length of output string */
1490 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491 if (count < 0)
1492 count = INT_MAX;
1493 else if (nfound > count)
1494 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 if (nfound == 0)
1496 goto return_same;
1497 new_len = len + nfound*(sub_len - pat_len);
1498
Guido van Rossumb18618d2000-05-03 23:44:39 +00001499 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500 if (new_s == NULL) return NULL;
1501
1502 *out_len = new_len;
1503 out_s = new_s;
1504
1505 while (len > 0) {
1506 /* find index of next instance of pattern */
1507 offset = mymemfind(str, len, pat, pat_len);
1508 /* if not found, break out of loop */
1509 if (offset == -1) break;
1510
1511 /* copy non matching part of input string */
1512 memcpy(new_s, str, offset); /* copy part of str before pat */
1513 str += offset + pat_len; /* move str past pattern */
1514 len -= offset + pat_len; /* reduce length of str remaining */
1515
1516 /* copy substitute into the output string */
1517 new_s += offset; /* move new_s to dest for sub string */
1518 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1519 new_s += sub_len; /* offset new_s past sub string */
1520
1521 /* break when we've done count replacements */
1522 if (--count == 0) break;
1523 }
1524 /* copy any remaining values into output string */
1525 if (len > 0)
1526 memcpy(new_s, str, len);
1527 return out_s;
1528
1529 return_same:
1530 *out_len = -1;
1531 return str;
1532}
1533
1534
1535static char replace__doc__[] =
1536"S.replace (old, new[, maxsplit]) -> string\n\
1537\n\
1538Return a copy of string S with all occurrences of substring\n\
1539old replaced by new. If the optional argument maxsplit is\n\
1540given, only the first maxsplit occurrences are replaced.";
1541
1542static PyObject *
1543string_replace(self, args)
1544 PyStringObject *self;
1545 PyObject *args;
1546{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001547 const char *str = PyString_AS_STRING(self), *sub, *repl;
1548 char *new_s;
1549 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1550 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 if (!PyArg_ParseTuple(args, "OO|i:replace",
1555 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001557
1558 if (PyString_Check(subobj)) {
1559 sub = PyString_AS_STRING(subobj);
1560 sub_len = PyString_GET_SIZE(subobj);
1561 }
1562 else if (PyUnicode_Check(subobj))
1563 return PyUnicode_Replace((PyObject *)self,
1564 subobj, replobj, count);
1565 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1566 return NULL;
1567
1568 if (PyString_Check(replobj)) {
1569 repl = PyString_AS_STRING(replobj);
1570 repl_len = PyString_GET_SIZE(replobj);
1571 }
1572 else if (PyUnicode_Check(replobj))
1573 return PyUnicode_Replace((PyObject *)self,
1574 subobj, replobj, count);
1575 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1576 return NULL;
1577
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001578 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001579 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001580 return NULL;
1581 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001582 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583 if (new_s == NULL) {
1584 PyErr_NoMemory();
1585 return NULL;
1586 }
1587 if (out_len == -1) {
1588 /* we're returning another reference to self */
1589 new = (PyObject*)self;
1590 Py_INCREF(new);
1591 }
1592 else {
1593 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001594 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595 }
1596 return new;
1597}
1598
1599
1600static char startswith__doc__[] =
1601"S.startswith(prefix[, start[, end]]) -> int\n\
1602\n\
1603Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1604optional start, test S beginning at that position. With optional end, stop\n\
1605comparing S at that position.";
1606
1607static PyObject *
1608string_startswith(self, args)
1609 PyStringObject *self;
1610 PyObject *args;
1611{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001612 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 int plen;
1616 int start = 0;
1617 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001618 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619
Guido van Rossum4c08d552000-03-10 22:55:18 +00001620 if (!PyArg_ParseTuple(args, "O|ii:startswith", &subobj, &start, &end))
1621 return NULL;
1622 if (PyString_Check(subobj)) {
1623 prefix = PyString_AS_STRING(subobj);
1624 plen = PyString_GET_SIZE(subobj);
1625 }
1626 else if (PyUnicode_Check(subobj))
1627 return PyInt_FromLong(
1628 PyUnicode_Tailmatch((PyObject *)self,
1629 subobj, start, end, -1));
1630 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631 return NULL;
1632
1633 /* adopt Java semantics for index out of range. it is legal for
1634 * offset to be == plen, but this only returns true if prefix is
1635 * the empty string.
1636 */
1637 if (start < 0 || start+plen > len)
1638 return PyInt_FromLong(0);
1639
1640 if (!memcmp(str+start, prefix, plen)) {
1641 /* did the match end after the specified end? */
1642 if (end < 0)
1643 return PyInt_FromLong(1);
1644 else if (end - start < plen)
1645 return PyInt_FromLong(0);
1646 else
1647 return PyInt_FromLong(1);
1648 }
1649 else return PyInt_FromLong(0);
1650}
1651
1652
1653static char endswith__doc__[] =
1654"S.endswith(suffix[, start[, end]]) -> int\n\
1655\n\
1656Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1657optional start, test S beginning at that position. With optional end, stop\n\
1658comparing S at that position.";
1659
1660static PyObject *
1661string_endswith(self, args)
1662 PyStringObject *self;
1663 PyObject *args;
1664{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001666 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001667 const char* suffix;
1668 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669 int start = 0;
1670 int end = -1;
1671 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001672 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001673
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674 if (!PyArg_ParseTuple(args, "O|ii:endswith", &subobj, &start, &end))
1675 return NULL;
1676 if (PyString_Check(subobj)) {
1677 suffix = PyString_AS_STRING(subobj);
1678 slen = PyString_GET_SIZE(subobj);
1679 }
1680 else if (PyUnicode_Check(subobj))
1681 return PyInt_FromLong(
1682 PyUnicode_Tailmatch((PyObject *)self,
1683 subobj, start, end, +1));
1684 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001685 return NULL;
1686
Guido van Rossum4c08d552000-03-10 22:55:18 +00001687 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688 return PyInt_FromLong(0);
1689
1690 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001691 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694 return PyInt_FromLong(1);
1695 else return PyInt_FromLong(0);
1696}
1697
1698
Guido van Rossum4c08d552000-03-10 22:55:18 +00001699static char expandtabs__doc__[] =
1700"S.expandtabs([tabsize]) -> string\n\
1701\n\
1702Return a copy of S where all tab characters are expanded using spaces.\n\
1703If tabsize is not given, a tab size of 8 characters is assumed.";
1704
1705static PyObject*
1706string_expandtabs(PyStringObject *self, PyObject *args)
1707{
1708 const char *e, *p;
1709 char *q;
1710 int i, j;
1711 PyObject *u;
1712 int tabsize = 8;
1713
1714 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1715 return NULL;
1716
1717 /* First pass: determine size of ouput string */
1718 i = j = 0;
1719 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1720 for (p = PyString_AS_STRING(self); p < e; p++)
1721 if (*p == '\t') {
1722 if (tabsize > 0)
1723 j += tabsize - (j % tabsize);
1724 }
1725 else {
1726 j++;
1727 if (*p == '\n' || *p == '\r') {
1728 i += j;
1729 j = 0;
1730 }
1731 }
1732
1733 /* Second pass: create output string and fill it */
1734 u = PyString_FromStringAndSize(NULL, i + j);
1735 if (!u)
1736 return NULL;
1737
1738 j = 0;
1739 q = PyString_AS_STRING(u);
1740
1741 for (p = PyString_AS_STRING(self); p < e; p++)
1742 if (*p == '\t') {
1743 if (tabsize > 0) {
1744 i = tabsize - (j % tabsize);
1745 j += i;
1746 while (i--)
1747 *q++ = ' ';
1748 }
1749 }
1750 else {
1751 j++;
1752 *q++ = *p;
1753 if (*p == '\n' || *p == '\r')
1754 j = 0;
1755 }
1756
1757 return u;
1758}
1759
1760static
1761PyObject *pad(PyStringObject *self,
1762 int left,
1763 int right,
1764 char fill)
1765{
1766 PyObject *u;
1767
1768 if (left < 0)
1769 left = 0;
1770 if (right < 0)
1771 right = 0;
1772
1773 if (left == 0 && right == 0) {
1774 Py_INCREF(self);
1775 return (PyObject *)self;
1776 }
1777
1778 u = PyString_FromStringAndSize(NULL,
1779 left + PyString_GET_SIZE(self) + right);
1780 if (u) {
1781 if (left)
1782 memset(PyString_AS_STRING(u), fill, left);
1783 memcpy(PyString_AS_STRING(u) + left,
1784 PyString_AS_STRING(self),
1785 PyString_GET_SIZE(self));
1786 if (right)
1787 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1788 fill, right);
1789 }
1790
1791 return u;
1792}
1793
1794static char ljust__doc__[] =
1795"S.ljust(width) -> string\n\
1796\n\
1797Return S left justified in a string of length width. Padding is\n\
1798done using spaces.";
1799
1800static PyObject *
1801string_ljust(PyStringObject *self, PyObject *args)
1802{
1803 int width;
1804 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1805 return NULL;
1806
1807 if (PyString_GET_SIZE(self) >= width) {
1808 Py_INCREF(self);
1809 return (PyObject*) self;
1810 }
1811
1812 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1813}
1814
1815
1816static char rjust__doc__[] =
1817"S.rjust(width) -> string\n\
1818\n\
1819Return S right justified in a string of length width. Padding is\n\
1820done using spaces.";
1821
1822static PyObject *
1823string_rjust(PyStringObject *self, PyObject *args)
1824{
1825 int width;
1826 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1827 return NULL;
1828
1829 if (PyString_GET_SIZE(self) >= width) {
1830 Py_INCREF(self);
1831 return (PyObject*) self;
1832 }
1833
1834 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1835}
1836
1837
1838static char center__doc__[] =
1839"S.center(width) -> string\n\
1840\n\
1841Return S centered in a string of length width. Padding is done\n\
1842using spaces.";
1843
1844static PyObject *
1845string_center(PyStringObject *self, PyObject *args)
1846{
1847 int marg, left;
1848 int width;
1849
1850 if (!PyArg_ParseTuple(args, "i:center", &width))
1851 return NULL;
1852
1853 if (PyString_GET_SIZE(self) >= width) {
1854 Py_INCREF(self);
1855 return (PyObject*) self;
1856 }
1857
1858 marg = width - PyString_GET_SIZE(self);
1859 left = marg / 2 + (marg & width & 1);
1860
1861 return pad(self, left, marg - left, ' ');
1862}
1863
1864#if 0
1865static char zfill__doc__[] =
1866"S.zfill(width) -> string\n\
1867\n\
1868Pad a numeric string x with zeros on the left, to fill a field\n\
1869of the specified width. The string x is never truncated.";
1870
1871static PyObject *
1872string_zfill(PyStringObject *self, PyObject *args)
1873{
1874 int fill;
1875 PyObject *u;
1876 char *str;
1877
1878 int width;
1879 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1880 return NULL;
1881
1882 if (PyString_GET_SIZE(self) >= width) {
1883 Py_INCREF(self);
1884 return (PyObject*) self;
1885 }
1886
1887 fill = width - PyString_GET_SIZE(self);
1888
1889 u = pad(self, fill, 0, '0');
1890 if (u == NULL)
1891 return NULL;
1892
1893 str = PyString_AS_STRING(u);
1894 if (str[fill] == '+' || str[fill] == '-') {
1895 /* move sign to beginning of string */
1896 str[0] = str[fill];
1897 str[fill] = '0';
1898 }
1899
1900 return u;
1901}
1902#endif
1903
1904static char isspace__doc__[] =
1905"S.isspace() -> int\n\
1906\n\
1907Return 1 if there are only whitespace characters in S,\n\
19080 otherwise.";
1909
1910static PyObject*
1911string_isspace(PyStringObject *self, PyObject *args)
1912{
1913 register const char *p = PyString_AS_STRING(self);
1914 register const char *e;
1915
1916 if (!PyArg_NoArgs(args))
1917 return NULL;
1918
1919 /* Shortcut for single character strings */
1920 if (PyString_GET_SIZE(self) == 1 &&
1921 isspace(*p))
1922 return PyInt_FromLong(1);
1923
1924 e = p + PyString_GET_SIZE(self);
1925 for (; p < e; p++) {
1926 if (!isspace(*p))
1927 return PyInt_FromLong(0);
1928 }
1929 return PyInt_FromLong(1);
1930}
1931
1932
1933static char isdigit__doc__[] =
1934"S.isdigit() -> int\n\
1935\n\
1936Return 1 if there are only digit characters in S,\n\
19370 otherwise.";
1938
1939static PyObject*
1940string_isdigit(PyStringObject *self, PyObject *args)
1941{
1942 register const char *p = PyString_AS_STRING(self);
1943 register const char *e;
1944
1945 if (!PyArg_NoArgs(args))
1946 return NULL;
1947
1948 /* Shortcut for single character strings */
1949 if (PyString_GET_SIZE(self) == 1 &&
1950 isdigit(*p))
1951 return PyInt_FromLong(1);
1952
1953 e = p + PyString_GET_SIZE(self);
1954 for (; p < e; p++) {
1955 if (!isdigit(*p))
1956 return PyInt_FromLong(0);
1957 }
1958 return PyInt_FromLong(1);
1959}
1960
1961
1962static char islower__doc__[] =
1963"S.islower() -> int\n\
1964\n\
1965Return 1 if all cased characters in S are lowercase and there is\n\
1966at least one cased character in S, 0 otherwise.";
1967
1968static PyObject*
1969string_islower(PyStringObject *self, PyObject *args)
1970{
1971 register const char *p = PyString_AS_STRING(self);
1972 register const char *e;
1973 int cased;
1974
1975 if (!PyArg_NoArgs(args))
1976 return NULL;
1977
1978 /* Shortcut for single character strings */
1979 if (PyString_GET_SIZE(self) == 1)
1980 return PyInt_FromLong(islower(*p) != 0);
1981
1982 e = p + PyString_GET_SIZE(self);
1983 cased = 0;
1984 for (; p < e; p++) {
1985 if (isupper(*p))
1986 return PyInt_FromLong(0);
1987 else if (!cased && islower(*p))
1988 cased = 1;
1989 }
1990 return PyInt_FromLong(cased);
1991}
1992
1993
1994static char isupper__doc__[] =
1995"S.isupper() -> int\n\
1996\n\
1997Return 1 if all cased characters in S are uppercase and there is\n\
1998at least one cased character in S, 0 otherwise.";
1999
2000static PyObject*
2001string_isupper(PyStringObject *self, PyObject *args)
2002{
2003 register const char *p = PyString_AS_STRING(self);
2004 register const char *e;
2005 int cased;
2006
2007 if (!PyArg_NoArgs(args))
2008 return NULL;
2009
2010 /* Shortcut for single character strings */
2011 if (PyString_GET_SIZE(self) == 1)
2012 return PyInt_FromLong(isupper(*p) != 0);
2013
2014 e = p + PyString_GET_SIZE(self);
2015 cased = 0;
2016 for (; p < e; p++) {
2017 if (islower(*p))
2018 return PyInt_FromLong(0);
2019 else if (!cased && isupper(*p))
2020 cased = 1;
2021 }
2022 return PyInt_FromLong(cased);
2023}
2024
2025
2026static char istitle__doc__[] =
2027"S.istitle() -> int\n\
2028\n\
2029Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2030may only follow uncased characters and lowercase characters only cased\n\
2031ones. Return 0 otherwise.";
2032
2033static PyObject*
2034string_istitle(PyStringObject *self, PyObject *args)
2035{
2036 register const char *p = PyString_AS_STRING(self);
2037 register const char *e;
2038 int cased, previous_is_cased;
2039
2040 if (!PyArg_NoArgs(args))
2041 return NULL;
2042
2043 /* Shortcut for single character strings */
2044 if (PyString_GET_SIZE(self) == 1)
2045 return PyInt_FromLong(isupper(*p) != 0);
2046
2047 e = p + PyString_GET_SIZE(self);
2048 cased = 0;
2049 previous_is_cased = 0;
2050 for (; p < e; p++) {
2051 register const char ch = *p;
2052
2053 if (isupper(ch)) {
2054 if (previous_is_cased)
2055 return PyInt_FromLong(0);
2056 previous_is_cased = 1;
2057 cased = 1;
2058 }
2059 else if (islower(ch)) {
2060 if (!previous_is_cased)
2061 return PyInt_FromLong(0);
2062 previous_is_cased = 1;
2063 cased = 1;
2064 }
2065 else
2066 previous_is_cased = 0;
2067 }
2068 return PyInt_FromLong(cased);
2069}
2070
2071
2072static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002073"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002074\n\
2075Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002076Line breaks are not included in the resulting list unless keepends\n\
2077is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078
2079#define SPLIT_APPEND(data, left, right) \
2080 str = PyString_FromStringAndSize(data + left, right - left); \
2081 if (!str) \
2082 goto onError; \
2083 if (PyList_Append(list, str)) { \
2084 Py_DECREF(str); \
2085 goto onError; \
2086 } \
2087 else \
2088 Py_DECREF(str);
2089
2090static PyObject*
2091string_splitlines(PyStringObject *self, PyObject *args)
2092{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002093 register int i;
2094 register int j;
2095 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002096 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002097 PyObject *list;
2098 PyObject *str;
2099 char *data;
2100
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002101 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002102 return NULL;
2103
2104 data = PyString_AS_STRING(self);
2105 len = PyString_GET_SIZE(self);
2106
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 list = PyList_New(0);
2108 if (!list)
2109 goto onError;
2110
2111 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002112 int eol;
2113
Guido van Rossum4c08d552000-03-10 22:55:18 +00002114 /* Find a line and append it */
2115 while (i < len && data[i] != '\n' && data[i] != '\r')
2116 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002117
2118 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002119 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002120 if (i < len) {
2121 if (data[i] == '\r' && i + 1 < len &&
2122 data[i+1] == '\n')
2123 i += 2;
2124 else
2125 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002126 if (keepends)
2127 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002128 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002129 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130 j = i;
2131 }
2132 if (j < len) {
2133 SPLIT_APPEND(data, j, len);
2134 }
2135
2136 return list;
2137
2138 onError:
2139 Py_DECREF(list);
2140 return NULL;
2141}
2142
2143#undef SPLIT_APPEND
2144
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145
2146static PyMethodDef
2147string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002148 /* Counterparts of the obsolete stropmodule functions; except
2149 string.maketrans(). */
2150 {"join", (PyCFunction)string_join, 1, join__doc__},
2151 {"split", (PyCFunction)string_split, 1, split__doc__},
2152 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2153 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2154 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2155 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2156 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2157 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2158 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2160 {"count", (PyCFunction)string_count, 1, count__doc__},
2161 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2162 {"find", (PyCFunction)string_find, 1, find__doc__},
2163 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2166 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2167 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2168 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2170 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2171 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2173 {"title", (PyCFunction)string_title, 1, title__doc__},
2174 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2175 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2176 {"center", (PyCFunction)string_center, 1, center__doc__},
2177 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2178 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2179#if 0
2180 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2181#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182 {NULL, NULL} /* sentinel */
2183};
2184
2185static PyObject *
2186string_getattr(s, name)
2187 PyStringObject *s;
2188 char *name;
2189{
2190 return Py_FindMethod(string_methods, (PyObject*)s, name);
2191}
2192
2193
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002194PyTypeObject PyString_Type = {
2195 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002196 0,
2197 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002198 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002199 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002200 (destructor)string_dealloc, /*tp_dealloc*/
2201 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002203 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002204 (cmpfunc)string_compare, /*tp_compare*/
2205 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002206 0, /*tp_as_number*/
2207 &string_as_sequence, /*tp_as_sequence*/
2208 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002209 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002210 0, /*tp_call*/
2211 0, /*tp_str*/
2212 0, /*tp_getattro*/
2213 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002214 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002215 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002216 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002217};
2218
2219void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002220PyString_Concat(pv, w)
2221 register PyObject **pv;
2222 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002223{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002224 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002225 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002226 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002227 if (w == NULL || !PyString_Check(*pv)) {
2228 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002229 *pv = NULL;
2230 return;
2231 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002232 v = string_concat((PyStringObject *) *pv, w);
2233 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002234 *pv = v;
2235}
2236
Guido van Rossum013142a1994-08-30 08:19:36 +00002237void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002238PyString_ConcatAndDel(pv, w)
2239 register PyObject **pv;
2240 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002241{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002242 PyString_Concat(pv, w);
2243 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002244}
2245
2246
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002247/* The following function breaks the notion that strings are immutable:
2248 it changes the size of a string. We get away with this only if there
2249 is only one module referencing the object. You can also think of it
2250 as creating a new string object and destroying the old one, only
2251 more efficiently. In any case, don't use this if the string may
2252 already be known to some other part of the code... */
2253
2254int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002255_PyString_Resize(pv, newsize)
2256 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002257 int newsize;
2258{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002259 register PyObject *v;
2260 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002261 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002262 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002263 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002264 Py_DECREF(v);
2265 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002266 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002267 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002268 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002269#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002270 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002271#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002272 _Py_ForgetReference(v);
2273 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002274 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002275 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002276 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002277 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002278 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002279 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002280 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002281 _Py_NewReference(*pv);
2282 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002283 sv->ob_size = newsize;
2284 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002285 return 0;
2286}
Guido van Rossume5372401993-03-16 12:15:04 +00002287
2288/* Helpers for formatstring */
2289
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002290static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002291getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002292 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002293 int arglen;
2294 int *p_argidx;
2295{
2296 int argidx = *p_argidx;
2297 if (argidx < arglen) {
2298 (*p_argidx)++;
2299 if (arglen < 0)
2300 return args;
2301 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002302 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002303 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002304 PyErr_SetString(PyExc_TypeError,
2305 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002306 return NULL;
2307}
2308
2309#define F_LJUST (1<<0)
2310#define F_SIGN (1<<1)
2311#define F_BLANK (1<<2)
2312#define F_ALT (1<<3)
2313#define F_ZERO (1<<4)
2314
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002315static int
2316formatfloat(buf, flags, prec, type, v)
2317 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002318 int flags;
2319 int prec;
2320 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002321 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002322{
2323 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002324 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002325 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002326 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002327 if (prec < 0)
2328 prec = 6;
2329 if (prec > 50)
2330 prec = 50; /* Arbitrary limitation */
2331 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2332 type = 'g';
2333 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2334 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002335 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002336}
2337
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002338static int
2339formatint(buf, flags, prec, type, v)
2340 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002341 int flags;
2342 int prec;
2343 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002344 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002345{
2346 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002347 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002348 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002349 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002350 if (prec < 0)
2351 prec = 1;
2352 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2353 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002354 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002355}
2356
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002357static int
2358formatchar(buf, v)
2359 char *buf;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002360 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002361{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002362 if (PyString_Check(v)) {
2363 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002364 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002365 }
2366 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002367 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002368 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002369 }
2370 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002371 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002372}
2373
Guido van Rossum013142a1994-08-30 08:19:36 +00002374
Guido van Rossume5372401993-03-16 12:15:04 +00002375/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
2376
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002377PyObject *
2378PyString_Format(format, args)
2379 PyObject *format;
2380 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002381{
2382 char *fmt, *res;
2383 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002384 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002385 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002386 PyObject *dict = NULL;
2387 if (format == NULL || !PyString_Check(format) || args == NULL) {
2388 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002389 return NULL;
2390 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002391 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002392 fmt = PyString_AsString(format);
2393 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002394 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002395 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002396 if (result == NULL)
2397 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002398 res = PyString_AsString(result);
2399 if (PyTuple_Check(args)) {
2400 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002401 argidx = 0;
2402 }
2403 else {
2404 arglen = -1;
2405 argidx = -2;
2406 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002407 if (args->ob_type->tp_as_mapping)
2408 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002409 while (--fmtcnt >= 0) {
2410 if (*fmt != '%') {
2411 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002412 rescnt = fmtcnt + 100;
2413 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002414 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002415 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002416 res = PyString_AsString(result)
2417 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002418 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002419 }
2420 *res++ = *fmt++;
2421 }
2422 else {
2423 /* Got a format specifier */
2424 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002425 int width = -1;
2426 int prec = -1;
2427 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002428 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002429 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002430 PyObject *v = NULL;
2431 PyObject *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +00002432 char *buf;
2433 int sign;
2434 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002435 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002436 char *fmt_start = fmt;
2437
Guido van Rossumda9c2711996-12-05 21:58:58 +00002438 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002439 if (*fmt == '(') {
2440 char *keystart;
2441 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002442 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002443 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002444
2445 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002446 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002447 "format requires a mapping");
2448 goto error;
2449 }
2450 ++fmt;
2451 --fmtcnt;
2452 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002453 /* Skip over balanced parentheses */
2454 while (pcount > 0 && --fmtcnt >= 0) {
2455 if (*fmt == ')')
2456 --pcount;
2457 else if (*fmt == '(')
2458 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002459 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002460 }
2461 keylen = fmt - keystart - 1;
2462 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002463 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002464 "incomplete format key");
2465 goto error;
2466 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002467 key = PyString_FromStringAndSize(keystart,
2468 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002469 if (key == NULL)
2470 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002471 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002472 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002473 args_owned = 0;
2474 }
2475 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002476 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002477 if (args == NULL) {
2478 goto error;
2479 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002480 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002481 arglen = -1;
2482 argidx = -2;
2483 }
Guido van Rossume5372401993-03-16 12:15:04 +00002484 while (--fmtcnt >= 0) {
2485 switch (c = *fmt++) {
2486 case '-': flags |= F_LJUST; continue;
2487 case '+': flags |= F_SIGN; continue;
2488 case ' ': flags |= F_BLANK; continue;
2489 case '#': flags |= F_ALT; continue;
2490 case '0': flags |= F_ZERO; continue;
2491 }
2492 break;
2493 }
2494 if (c == '*') {
2495 v = getnextarg(args, arglen, &argidx);
2496 if (v == NULL)
2497 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002498 if (!PyInt_Check(v)) {
2499 PyErr_SetString(PyExc_TypeError,
2500 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002501 goto error;
2502 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002503 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002504 if (width < 0) {
2505 flags |= F_LJUST;
2506 width = -width;
2507 }
Guido van Rossume5372401993-03-16 12:15:04 +00002508 if (--fmtcnt >= 0)
2509 c = *fmt++;
2510 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002511 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002512 width = c - '0';
2513 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002514 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002515 if (!isdigit(c))
2516 break;
2517 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002518 PyErr_SetString(
2519 PyExc_ValueError,
2520 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002521 goto error;
2522 }
2523 width = width*10 + (c - '0');
2524 }
2525 }
2526 if (c == '.') {
2527 prec = 0;
2528 if (--fmtcnt >= 0)
2529 c = *fmt++;
2530 if (c == '*') {
2531 v = getnextarg(args, arglen, &argidx);
2532 if (v == NULL)
2533 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002534 if (!PyInt_Check(v)) {
2535 PyErr_SetString(
2536 PyExc_TypeError,
2537 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002538 goto error;
2539 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002541 if (prec < 0)
2542 prec = 0;
2543 if (--fmtcnt >= 0)
2544 c = *fmt++;
2545 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002546 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002547 prec = c - '0';
2548 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002549 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002550 if (!isdigit(c))
2551 break;
2552 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002553 PyErr_SetString(
2554 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002555 "prec too big");
2556 goto error;
2557 }
2558 prec = prec*10 + (c - '0');
2559 }
2560 }
2561 } /* prec */
2562 if (fmtcnt >= 0) {
2563 if (c == 'h' || c == 'l' || c == 'L') {
2564 size = c;
2565 if (--fmtcnt >= 0)
2566 c = *fmt++;
2567 }
2568 }
2569 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002570 PyErr_SetString(PyExc_ValueError,
2571 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002572 goto error;
2573 }
2574 if (c != '%') {
2575 v = getnextarg(args, arglen, &argidx);
2576 if (v == NULL)
2577 goto error;
2578 }
2579 sign = 0;
2580 fill = ' ';
2581 switch (c) {
2582 case '%':
2583 buf = "%";
2584 len = 1;
2585 break;
2586 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002587 case 'r':
2588 if (PyUnicode_Check(v)) {
2589 fmt = fmt_start;
2590 goto unicode;
2591 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002592 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002593 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002594 else
2595 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002596 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002597 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002598 if (!PyString_Check(temp)) {
2599 PyErr_SetString(PyExc_TypeError,
2600 "%s argument has non-string str()");
2601 goto error;
2602 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002603 buf = PyString_AsString(temp);
2604 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002605 if (prec >= 0 && len > prec)
2606 len = prec;
2607 break;
2608 case 'i':
2609 case 'd':
2610 case 'u':
2611 case 'o':
2612 case 'x':
2613 case 'X':
2614 if (c == 'i')
2615 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002616 buf = tmpbuf;
2617 len = formatint(buf, flags, prec, c, v);
2618 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002619 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002620 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002621 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002622 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002623 if ((flags&F_ALT) &&
2624 (c == 'x' || c == 'X') &&
2625 buf[0] == '0' && buf[1] == c) {
2626 *res++ = *buf++;
2627 *res++ = *buf++;
2628 rescnt -= 2;
2629 len -= 2;
2630 width -= 2;
2631 if (width < 0)
2632 width = 0;
2633 }
2634 }
Guido van Rossume5372401993-03-16 12:15:04 +00002635 break;
2636 case 'e':
2637 case 'E':
2638 case 'f':
2639 case 'g':
2640 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002641 buf = tmpbuf;
2642 len = formatfloat(buf, flags, prec, c, v);
2643 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002644 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002645 sign = 1;
2646 if (flags&F_ZERO)
2647 fill = '0';
2648 break;
2649 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002650 buf = tmpbuf;
2651 len = formatchar(buf, v);
2652 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002653 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002654 break;
2655 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002656 PyErr_Format(PyExc_ValueError,
2657 "unsupported format character '%c' (0x%x)",
2658 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002659 goto error;
2660 }
2661 if (sign) {
2662 if (*buf == '-' || *buf == '+') {
2663 sign = *buf++;
2664 len--;
2665 }
2666 else if (flags & F_SIGN)
2667 sign = '+';
2668 else if (flags & F_BLANK)
2669 sign = ' ';
2670 else
2671 sign = '\0';
2672 }
2673 if (width < len)
2674 width = len;
2675 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002676 reslen -= rescnt;
2677 rescnt = width + fmtcnt + 100;
2678 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002679 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002680 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002681 res = PyString_AsString(result)
2682 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002683 }
2684 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002685 if (fill != ' ')
2686 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002687 rescnt--;
2688 if (width > len)
2689 width--;
2690 }
2691 if (width > len && !(flags&F_LJUST)) {
2692 do {
2693 --rescnt;
2694 *res++ = fill;
2695 } while (--width > len);
2696 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002697 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002698 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002699 memcpy(res, buf, len);
2700 res += len;
2701 rescnt -= len;
2702 while (--width >= len) {
2703 --rescnt;
2704 *res++ = ' ';
2705 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002706 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002707 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002708 "not all arguments converted");
2709 goto error;
2710 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002711 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002712 } /* '%' */
2713 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002714 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002715 PyErr_SetString(PyExc_TypeError,
2716 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002717 goto error;
2718 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002719 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002720 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002721 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002722 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002723 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002724
2725 unicode:
2726 if (args_owned) {
2727 Py_DECREF(args);
2728 args_owned = 0;
2729 }
2730 /* Fiddle args right (remove the first argidx-1 arguments) */
2731 --argidx;
2732 if (PyTuple_Check(orig_args) && argidx > 0) {
2733 PyObject *v;
2734 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2735 v = PyTuple_New(n);
2736 if (v == NULL)
2737 goto error;
2738 while (--n >= 0) {
2739 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2740 Py_INCREF(w);
2741 PyTuple_SET_ITEM(v, n, w);
2742 }
2743 args = v;
2744 } else {
2745 Py_INCREF(orig_args);
2746 args = orig_args;
2747 }
2748 /* Paste rest of format string to what we have of the result
2749 string; we reuse result for this */
2750 rescnt = res - PyString_AS_STRING(result);
2751 fmtcnt = PyString_GET_SIZE(format) - \
2752 (fmt - PyString_AS_STRING(format));
2753 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2754 Py_DECREF(args);
2755 goto error;
2756 }
2757 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2758 format = result;
2759 /* Let Unicode do its magic */
2760 result = PyUnicode_Format(format, args);
2761 Py_DECREF(format);
2762 Py_DECREF(args);
2763 return result;
2764
Guido van Rossume5372401993-03-16 12:15:04 +00002765 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002766 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002767 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002768 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002769 }
Guido van Rossume5372401993-03-16 12:15:04 +00002770 return NULL;
2771}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002772
2773
2774#ifdef INTERN_STRINGS
2775
2776static PyObject *interned;
2777
2778void
2779PyString_InternInPlace(p)
2780 PyObject **p;
2781{
2782 register PyStringObject *s = (PyStringObject *)(*p);
2783 PyObject *t;
2784 if (s == NULL || !PyString_Check(s))
2785 Py_FatalError("PyString_InternInPlace: strings only please!");
2786 if ((t = s->ob_sinterned) != NULL) {
2787 if (t == (PyObject *)s)
2788 return;
2789 Py_INCREF(t);
2790 *p = t;
2791 Py_DECREF(s);
2792 return;
2793 }
2794 if (interned == NULL) {
2795 interned = PyDict_New();
2796 if (interned == NULL)
2797 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002798 }
2799 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2800 Py_INCREF(t);
2801 *p = s->ob_sinterned = t;
2802 Py_DECREF(s);
2803 return;
2804 }
2805 t = (PyObject *)s;
2806 if (PyDict_SetItem(interned, t, t) == 0) {
2807 s->ob_sinterned = t;
2808 return;
2809 }
2810 PyErr_Clear();
2811}
2812
2813
2814PyObject *
2815PyString_InternFromString(cp)
2816 const char *cp;
2817{
2818 PyObject *s = PyString_FromString(cp);
2819 if (s == NULL)
2820 return NULL;
2821 PyString_InternInPlace(&s);
2822 return s;
2823}
2824
2825#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002826
2827void
2828PyString_Fini()
2829{
2830 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002831 for (i = 0; i < UCHAR_MAX + 1; i++) {
2832 Py_XDECREF(characters[i]);
2833 characters[i] = NULL;
2834 }
2835#ifndef DONT_SHARE_SHORT_STRINGS
2836 Py_XDECREF(nullstring);
2837 nullstring = NULL;
2838#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002839#ifdef INTERN_STRINGS
2840 if (interned) {
2841 int pos, changed;
2842 PyObject *key, *value;
2843 do {
2844 changed = 0;
2845 pos = 0;
2846 while (PyDict_Next(interned, &pos, &key, &value)) {
2847 if (key->ob_refcnt == 2 && key == value) {
2848 PyDict_DelItem(interned, key);
2849 changed = 1;
2850 }
2851 }
2852 } while (changed);
2853 }
2854#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002855}