blob: f17fbf1a062ec69f3d9590c461a6ad0293546e29 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum71160aa1997-06-03 18:03:18 +000036#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000037#include <ctype.h>
38
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000039#ifdef COUNT_ALLOCS
40int null_strings, one_strings;
41#endif
42
Guido van Rossum03093a21994-09-28 15:51:32 +000043#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000044#include <limits.h>
45#else
46#ifndef UCHAR_MAX
47#define UCHAR_MAX 255
48#endif
49#endif
50
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000052#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055
56/*
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
64 not NULL.
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
70*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071PyObject *
72PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000073 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074 int size;
75{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000077#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
92 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000095
96 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000098 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000099 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000101 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102#ifdef CACHE_HASH
103 op->ob_shash = -1;
104#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000105#ifdef INTERN_STRINGS
106 op->ob_sinterned = NULL;
107#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000108 if (str != NULL)
109 memcpy(op->ob_sval, str, size);
110 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0) {
113 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 } else if (size == 1 && str != NULL) {
116 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121}
122
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123PyObject *
124PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000125 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126{
127 register unsigned int size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000129#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 if (size == 0 && (op = nullstring) != NULL) {
131#ifdef COUNT_ALLOCS
132 null_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138#ifdef COUNT_ALLOCS
139 one_strings++;
140#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000144#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145
146 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000148 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000149 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000151 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152#ifdef CACHE_HASH
153 op->ob_shash = -1;
154#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000155#ifdef INTERN_STRINGS
156 op->ob_sinterned = NULL;
157#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000158 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000159#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 if (size == 0) {
161 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000162 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000163 } else if (size == 1) {
164 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000165 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000166 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000167#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000168 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169}
170
Guido van Rossum234f9421993-06-17 12:35:49 +0000171static void
Guido van Rossume5372401993-03-16 12:15:04 +0000172string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000173 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000174{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000175 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000176}
177
Guido van Rossumd7047b31995-01-02 19:07:15 +0000178int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000179PyString_Size(op)
180 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000182 if (!PyString_Check(op)) {
183 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184 return -1;
185 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000186 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000187}
188
189/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000190PyString_AsString(op)
191 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000193 if (!PyString_Check(op)) {
194 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 return NULL;
196 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000197 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198}
199
200/* Methods */
201
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000202static int
Guido van Rossume5372401993-03-16 12:15:04 +0000203string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000204 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 FILE *fp;
206 int flags;
207{
208 int i;
209 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000210 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000212 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000214 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216
217 /* figure out which quote to use; single is prefered */
218 quote = '\'';
219 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
220 quote = '"';
221
222 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 for (i = 0; i < op->ob_size; i++) {
224 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 fprintf(fp, "\\%c", c);
227 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000228 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000232 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000233 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234}
235
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000236static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000237string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239{
240 /* XXX overflow? */
241 int newsize = 2 + 4 * op->ob_size * sizeof(char);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000242 PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000244 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000245 }
246 else {
247 register int i;
248 register char c;
249 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000250 int quote;
251
252 /* figure out which quote to use; single is prefered */
253 quote = '\'';
254 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
255 quote = '"';
256
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000257 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 for (i = 0; i < op->ob_size; i++) {
260 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 *p++ = '\\', *p++ = c;
263 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000264 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 while (*p != '\0')
266 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 }
268 else
269 *p++ = c;
270 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000271 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000273 _PyString_Resize(
274 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000275 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277}
278
279static int
Guido van Rossume5372401993-03-16 12:15:04 +0000280string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282{
283 return a->ob_size;
284}
285
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000286static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000287string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000288 register PyStringObject *a;
289 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290{
291 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 register PyStringObject *op;
293 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000294 if (PyUnicode_Check(bb))
295 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000296 PyErr_BadArgument();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297 return NULL;
298 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000299#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 /* Optimize cases with empty left or right operand */
301 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000302 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303 return bb;
304 }
305 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000306 Py_INCREF(a);
307 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000308 }
309 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000310 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000311 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000312 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000313 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000314 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000315 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000316#ifdef CACHE_HASH
317 op->ob_shash = -1;
318#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000319#ifdef INTERN_STRINGS
320 op->ob_sinterned = NULL;
321#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000322 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
323 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
324 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326#undef b
327}
328
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000329static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000330string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000331 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000332 register int n;
333{
334 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000335 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000336 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 if (n < 0)
338 n = 0;
339 size = a->ob_size * n;
340 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000341 Py_INCREF(a);
342 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000343 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000344 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000346 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000347 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000348 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000349 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000350#ifdef CACHE_HASH
351 op->ob_shash = -1;
352#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000353#ifdef INTERN_STRINGS
354 op->ob_sinterned = NULL;
355#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000356 for (i = 0; i < size; i += a->ob_size)
357 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
358 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000359 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000360}
361
362/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
363
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000364static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000365string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000367 register int i, j; /* May be negative! */
368{
369 if (i < 0)
370 i = 0;
371 if (j < 0)
372 j = 0; /* Avoid signed/unsigned bug in next line */
373 if (j > a->ob_size)
374 j = a->ob_size;
375 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000376 Py_INCREF(a);
377 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000378 }
379 if (j < i)
380 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000381 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382}
383
Guido van Rossum9284a572000-03-07 15:53:43 +0000384static int
385string_contains(a, el)
386PyObject *a, *el;
387{
388 register char *s, *end;
389 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000390 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000391 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000392 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000393 PyErr_SetString(PyExc_TypeError,
394 "string member test needs char left operand");
395 return -1;
396 }
397 c = PyString_AsString(el)[0];
398 s = PyString_AsString(a);
399 end = s + PyString_Size(a);
400 while (s < end) {
401 if (c == *s++)
402 return 1;
403 }
404 return 0;
405}
406
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000407static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000408string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 register int i;
411{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000412 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000413 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000414 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 return NULL;
417 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000418 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000419 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000420#ifdef COUNT_ALLOCS
421 if (v != NULL)
422 one_strings++;
423#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000424 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000425 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000426 if (v == NULL)
427 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 characters[c] = (PyStringObject *) v;
429 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000430 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000431 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000432 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000433}
434
435static int
Guido van Rossume5372401993-03-16 12:15:04 +0000436string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000437 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000438{
Guido van Rossum253919f1991-02-13 23:18:39 +0000439 int len_a = a->ob_size, len_b = b->ob_size;
440 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000441 int cmp;
442 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000443 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000444 if (cmp == 0)
445 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
446 if (cmp != 0)
447 return cmp;
448 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000449 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000450}
451
Guido van Rossum9bfef441993-03-29 10:43:31 +0000452static long
453string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000454 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000455{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000456 register int len;
457 register unsigned char *p;
458 register long x;
459
460#ifdef CACHE_HASH
461 if (a->ob_shash != -1)
462 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000463#ifdef INTERN_STRINGS
464 if (a->ob_sinterned != NULL)
465 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000466 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000467#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000468#endif
469 len = a->ob_size;
470 p = (unsigned char *) a->ob_sval;
471 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000472 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000473 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000474 x ^= a->ob_size;
475 if (x == -1)
476 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000477#ifdef CACHE_HASH
478 a->ob_shash = x;
479#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000480 return x;
481}
482
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000483static int
484string_buffer_getreadbuf(self, index, ptr)
485 PyStringObject *self;
486 int index;
487 const void **ptr;
488{
489 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000490 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000491 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000492 return -1;
493 }
494 *ptr = (void *)self->ob_sval;
495 return self->ob_size;
496}
497
498static int
499string_buffer_getwritebuf(self, index, ptr)
500 PyStringObject *self;
501 int index;
502 const void **ptr;
503{
Guido van Rossum045e6881997-09-08 18:30:11 +0000504 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000505 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000506 return -1;
507}
508
509static int
510string_buffer_getsegcount(self, lenp)
511 PyStringObject *self;
512 int *lenp;
513{
514 if ( lenp )
515 *lenp = self->ob_size;
516 return 1;
517}
518
Guido van Rossum1db70701998-10-08 02:18:52 +0000519static int
520string_buffer_getcharbuf(self, index, ptr)
521 PyStringObject *self;
522 int index;
523 const char **ptr;
524{
525 if ( index != 0 ) {
526 PyErr_SetString(PyExc_SystemError,
527 "accessing non-existent string segment");
528 return -1;
529 }
530 *ptr = self->ob_sval;
531 return self->ob_size;
532}
533
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000534static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000535 (inquiry)string_length, /*sq_length*/
536 (binaryfunc)string_concat, /*sq_concat*/
537 (intargfunc)string_repeat, /*sq_repeat*/
538 (intargfunc)string_item, /*sq_item*/
539 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000540 0, /*sq_ass_item*/
541 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000542 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000543};
544
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000545static PyBufferProcs string_as_buffer = {
546 (getreadbufferproc)string_buffer_getreadbuf,
547 (getwritebufferproc)string_buffer_getwritebuf,
548 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000549 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000550};
551
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000552
553
554#define LEFTSTRIP 0
555#define RIGHTSTRIP 1
556#define BOTHSTRIP 2
557
558
559static PyObject *
560split_whitespace(s, len, maxsplit)
561 char *s;
562 int len;
563 int maxsplit;
564{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000565 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000566 PyObject* item;
567 PyObject *list = PyList_New(0);
568
569 if (list == NULL)
570 return NULL;
571
Guido van Rossum4c08d552000-03-10 22:55:18 +0000572 for (i = j = 0; i < len; ) {
573 while (i < len && isspace(Py_CHARMASK(s[i])))
574 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000575 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000576 while (i < len && !isspace(Py_CHARMASK(s[i])))
577 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000578 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000579 if (maxsplit-- <= 0)
580 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000581 item = PyString_FromStringAndSize(s+j, (int)(i-j));
582 if (item == NULL)
583 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000584 err = PyList_Append(list, item);
585 Py_DECREF(item);
586 if (err < 0)
587 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000588 while (i < len && isspace(Py_CHARMASK(s[i])))
589 i++;
590 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000591 }
592 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000593 if (j < len) {
594 item = PyString_FromStringAndSize(s+j, (int)(len - j));
595 if (item == NULL)
596 goto finally;
597 err = PyList_Append(list, item);
598 Py_DECREF(item);
599 if (err < 0)
600 goto finally;
601 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000602 return list;
603 finally:
604 Py_DECREF(list);
605 return NULL;
606}
607
608
609static char split__doc__[] =
610"S.split([sep [,maxsplit]]) -> list of strings\n\
611\n\
612Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000613delimiter string. If maxsplit is given, at most maxsplit\n\
614splits are done. If sep is not specified, any whitespace string\n\
615is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000616
617static PyObject *
618string_split(self, args)
619 PyStringObject *self;
620 PyObject *args;
621{
622 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000623 int maxsplit = -1;
624 const char *s = PyString_AS_STRING(self), *sub;
625 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000626
Guido van Rossum4c08d552000-03-10 22:55:18 +0000627 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000628 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000629 if (maxsplit < 0)
630 maxsplit = INT_MAX;
631 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000632 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000633 if (PyString_Check(subobj)) {
634 sub = PyString_AS_STRING(subobj);
635 n = PyString_GET_SIZE(subobj);
636 }
637 else if (PyUnicode_Check(subobj))
638 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
639 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
640 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000641 if (n == 0) {
642 PyErr_SetString(PyExc_ValueError, "empty separator");
643 return NULL;
644 }
645
646 list = PyList_New(0);
647 if (list == NULL)
648 return NULL;
649
650 i = j = 0;
651 while (i+n <= len) {
652 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000653 if (maxsplit-- <= 0)
654 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000655 item = PyString_FromStringAndSize(s+j, (int)(i-j));
656 if (item == NULL)
657 goto fail;
658 err = PyList_Append(list, item);
659 Py_DECREF(item);
660 if (err < 0)
661 goto fail;
662 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000663 }
664 else
665 i++;
666 }
667 item = PyString_FromStringAndSize(s+j, (int)(len-j));
668 if (item == NULL)
669 goto fail;
670 err = PyList_Append(list, item);
671 Py_DECREF(item);
672 if (err < 0)
673 goto fail;
674
675 return list;
676
677 fail:
678 Py_DECREF(list);
679 return NULL;
680}
681
682
683static char join__doc__[] =
684"S.join(sequence) -> string\n\
685\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000686Return a string which is the concatenation of the strings in the\n\
687sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688
689static PyObject *
690string_join(self, args)
691 PyStringObject *self;
692 PyObject *args;
693{
694 char *sep = PyString_AS_STRING(self);
695 int seplen = PyString_GET_SIZE(self);
696 PyObject *res = NULL;
697 int reslen = 0;
698 char *p;
699 int seqlen = 0;
700 int sz = 100;
701 int i, slen;
702 PyObject *seq;
703
Guido van Rossum43713e52000-02-29 13:59:29 +0000704 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000705 return NULL;
706
707 seqlen = PySequence_Length(seq);
708 if (seqlen < 0 && PyErr_Occurred())
709 return NULL;
710
711 if (seqlen == 1) {
712 /* Optimization if there's only one item */
713 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000714 if (item == NULL)
715 return NULL;
716 if (!PyString_Check(item) &&
717 !PyUnicode_Check(item)) {
718 PyErr_SetString(PyExc_TypeError,
719 "first argument must be sequence of strings");
720 Py_DECREF(item);
721 return NULL;
722 }
723 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000724 }
725 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
726 return NULL;
727 p = PyString_AsString(res);
728
729 /* optimize for lists. all others (tuples and arbitrary sequences)
730 * just use the abstract interface.
731 */
732 if (PyList_Check(seq)) {
733 for (i = 0; i < seqlen; i++) {
734 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000735 if (!PyString_Check(item)){
736 if (PyUnicode_Check(item)) {
737 Py_DECREF(res);
738 return PyUnicode_Join(
739 (PyObject *)self,
740 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000741 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000742 PyErr_Format(PyExc_TypeError,
743 "sequence item %i not a string",
744 i);
745 goto finally;
746 }
747 slen = PyString_GET_SIZE(item);
748 while (reslen + slen + seplen >= sz) {
749 if (_PyString_Resize(&res, sz*2))
750 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000751 sz *= 2;
752 p = PyString_AsString(res) + reslen;
753 }
754 if (i > 0) {
755 memcpy(p, sep, seplen);
756 p += seplen;
757 reslen += seplen;
758 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000759 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000760 p += slen;
761 reslen += slen;
762 }
763 }
764 else {
765 for (i = 0; i < seqlen; i++) {
766 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000767 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000768 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000769 if (!PyString_Check(item)){
770 if (PyUnicode_Check(item)) {
771 Py_DECREF(res);
772 Py_DECREF(item);
773 return PyUnicode_Join(
774 (PyObject *)self,
775 seq);
776 }
777 Py_DECREF(item);
778 PyErr_Format(PyExc_TypeError,
779 "sequence item %i not a string",
780 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000781 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000782 }
783 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000784 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000785 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000786 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000787 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000788 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 sz *= 2;
790 p = PyString_AsString(res) + reslen;
791 }
792 if (i > 0) {
793 memcpy(p, sep, seplen);
794 p += seplen;
795 reslen += seplen;
796 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000797 memcpy(p, PyString_AS_STRING(item), slen);
798 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000799 p += slen;
800 reslen += slen;
801 }
802 }
803 if (_PyString_Resize(&res, reslen))
804 goto finally;
805 return res;
806
807 finally:
808 Py_DECREF(res);
809 return NULL;
810}
811
812
813
814static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000815string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000816 PyStringObject *self;
817 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000818 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000819{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000820 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821 int len = PyString_GET_SIZE(self);
822 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000823 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000824
Guido van Rossumc6821402000-05-08 14:08:05 +0000825 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
826 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000827 return -2;
828 if (PyString_Check(subobj)) {
829 sub = PyString_AS_STRING(subobj);
830 n = PyString_GET_SIZE(subobj);
831 }
832 else if (PyUnicode_Check(subobj))
833 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
834 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000835 return -2;
836
837 if (last > len)
838 last = len;
839 if (last < 0)
840 last += len;
841 if (last < 0)
842 last = 0;
843 if (i < 0)
844 i += len;
845 if (i < 0)
846 i = 0;
847
Guido van Rossum4c08d552000-03-10 22:55:18 +0000848 if (dir > 0) {
849 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000850 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000851 last -= n;
852 for (; i <= last; ++i)
853 if (s[i] == sub[0] &&
854 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
855 return (long)i;
856 }
857 else {
858 int j;
859
860 if (n == 0 && i <= last)
861 return (long)last;
862 for (j = last-n; j >= i; --j)
863 if (s[j] == sub[0] &&
864 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
865 return (long)j;
866 }
867
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868 return -1;
869}
870
871
872static char find__doc__[] =
873"S.find(sub [,start [,end]]) -> int\n\
874\n\
875Return the lowest index in S where substring sub is found,\n\
876such that sub is contained within s[start,end]. Optional\n\
877arguments start and end are interpreted as in slice notation.\n\
878\n\
879Return -1 on failure.";
880
881static PyObject *
882string_find(self, args)
883 PyStringObject *self;
884 PyObject *args;
885{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000886 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000887 if (result == -2)
888 return NULL;
889 return PyInt_FromLong(result);
890}
891
892
893static char index__doc__[] =
894"S.index(sub [,start [,end]]) -> int\n\
895\n\
896Like S.find() but raise ValueError when the substring is not found.";
897
898static PyObject *
899string_index(self, args)
900 PyStringObject *self;
901 PyObject *args;
902{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000903 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000904 if (result == -2)
905 return NULL;
906 if (result == -1) {
907 PyErr_SetString(PyExc_ValueError,
908 "substring not found in string.index");
909 return NULL;
910 }
911 return PyInt_FromLong(result);
912}
913
914
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000915static char rfind__doc__[] =
916"S.rfind(sub [,start [,end]]) -> int\n\
917\n\
918Return the highest index in S where substring sub is found,\n\
919such that sub is contained within s[start,end]. Optional\n\
920arguments start and end are interpreted as in slice notation.\n\
921\n\
922Return -1 on failure.";
923
924static PyObject *
925string_rfind(self, args)
926 PyStringObject *self;
927 PyObject *args;
928{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000929 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000930 if (result == -2)
931 return NULL;
932 return PyInt_FromLong(result);
933}
934
935
936static char rindex__doc__[] =
937"S.rindex(sub [,start [,end]]) -> int\n\
938\n\
939Like S.rfind() but raise ValueError when the substring is not found.";
940
941static PyObject *
942string_rindex(self, args)
943 PyStringObject *self;
944 PyObject *args;
945{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000946 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000947 if (result == -2)
948 return NULL;
949 if (result == -1) {
950 PyErr_SetString(PyExc_ValueError,
951 "substring not found in string.rindex");
952 return NULL;
953 }
954 return PyInt_FromLong(result);
955}
956
957
958static PyObject *
959do_strip(self, args, striptype)
960 PyStringObject *self;
961 PyObject *args;
962 int striptype;
963{
964 char *s = PyString_AS_STRING(self);
965 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000966
Guido van Rossum43713e52000-02-29 13:59:29 +0000967 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000968 return NULL;
969
970 i = 0;
971 if (striptype != RIGHTSTRIP) {
972 while (i < len && isspace(Py_CHARMASK(s[i]))) {
973 i++;
974 }
975 }
976
977 j = len;
978 if (striptype != LEFTSTRIP) {
979 do {
980 j--;
981 } while (j >= i && isspace(Py_CHARMASK(s[j])));
982 j++;
983 }
984
985 if (i == 0 && j == len) {
986 Py_INCREF(self);
987 return (PyObject*)self;
988 }
989 else
990 return PyString_FromStringAndSize(s+i, j-i);
991}
992
993
994static char strip__doc__[] =
995"S.strip() -> string\n\
996\n\
997Return a copy of the string S with leading and trailing\n\
998whitespace removed.";
999
1000static PyObject *
1001string_strip(self, args)
1002 PyStringObject *self;
1003 PyObject *args;
1004{
1005 return do_strip(self, args, BOTHSTRIP);
1006}
1007
1008
1009static char lstrip__doc__[] =
1010"S.lstrip() -> string\n\
1011\n\
1012Return a copy of the string S with leading whitespace removed.";
1013
1014static PyObject *
1015string_lstrip(self, args)
1016 PyStringObject *self;
1017 PyObject *args;
1018{
1019 return do_strip(self, args, LEFTSTRIP);
1020}
1021
1022
1023static char rstrip__doc__[] =
1024"S.rstrip() -> string\n\
1025\n\
1026Return a copy of the string S with trailing whitespace removed.";
1027
1028static PyObject *
1029string_rstrip(self, args)
1030 PyStringObject *self;
1031 PyObject *args;
1032{
1033 return do_strip(self, args, RIGHTSTRIP);
1034}
1035
1036
1037static char lower__doc__[] =
1038"S.lower() -> string\n\
1039\n\
1040Return a copy of the string S converted to lowercase.";
1041
1042static PyObject *
1043string_lower(self, args)
1044 PyStringObject *self;
1045 PyObject *args;
1046{
1047 char *s = PyString_AS_STRING(self), *s_new;
1048 int i, n = PyString_GET_SIZE(self);
1049 PyObject *new;
1050
Guido van Rossum43713e52000-02-29 13:59:29 +00001051 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 return NULL;
1053 new = PyString_FromStringAndSize(NULL, n);
1054 if (new == NULL)
1055 return NULL;
1056 s_new = PyString_AsString(new);
1057 for (i = 0; i < n; i++) {
1058 int c = Py_CHARMASK(*s++);
1059 if (isupper(c)) {
1060 *s_new = tolower(c);
1061 } else
1062 *s_new = c;
1063 s_new++;
1064 }
1065 return new;
1066}
1067
1068
1069static char upper__doc__[] =
1070"S.upper() -> string\n\
1071\n\
1072Return a copy of the string S converted to uppercase.";
1073
1074static PyObject *
1075string_upper(self, args)
1076 PyStringObject *self;
1077 PyObject *args;
1078{
1079 char *s = PyString_AS_STRING(self), *s_new;
1080 int i, n = PyString_GET_SIZE(self);
1081 PyObject *new;
1082
Guido van Rossum43713e52000-02-29 13:59:29 +00001083 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001084 return NULL;
1085 new = PyString_FromStringAndSize(NULL, n);
1086 if (new == NULL)
1087 return NULL;
1088 s_new = PyString_AsString(new);
1089 for (i = 0; i < n; i++) {
1090 int c = Py_CHARMASK(*s++);
1091 if (islower(c)) {
1092 *s_new = toupper(c);
1093 } else
1094 *s_new = c;
1095 s_new++;
1096 }
1097 return new;
1098}
1099
1100
Guido van Rossum4c08d552000-03-10 22:55:18 +00001101static char title__doc__[] =
1102"S.title() -> string\n\
1103\n\
1104Return a titlecased version of S, i.e. words start with uppercase\n\
1105characters, all remaining cased characters have lowercase.";
1106
1107static PyObject*
1108string_title(PyUnicodeObject *self, PyObject *args)
1109{
1110 char *s = PyString_AS_STRING(self), *s_new;
1111 int i, n = PyString_GET_SIZE(self);
1112 int previous_is_cased = 0;
1113 PyObject *new;
1114
1115 if (!PyArg_ParseTuple(args, ":title"))
1116 return NULL;
1117 new = PyString_FromStringAndSize(NULL, n);
1118 if (new == NULL)
1119 return NULL;
1120 s_new = PyString_AsString(new);
1121 for (i = 0; i < n; i++) {
1122 int c = Py_CHARMASK(*s++);
1123 if (islower(c)) {
1124 if (!previous_is_cased)
1125 c = toupper(c);
1126 previous_is_cased = 1;
1127 } else if (isupper(c)) {
1128 if (previous_is_cased)
1129 c = tolower(c);
1130 previous_is_cased = 1;
1131 } else
1132 previous_is_cased = 0;
1133 *s_new++ = c;
1134 }
1135 return new;
1136}
1137
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138static char capitalize__doc__[] =
1139"S.capitalize() -> string\n\
1140\n\
1141Return a copy of the string S with only its first character\n\
1142capitalized.";
1143
1144static PyObject *
1145string_capitalize(self, args)
1146 PyStringObject *self;
1147 PyObject *args;
1148{
1149 char *s = PyString_AS_STRING(self), *s_new;
1150 int i, n = PyString_GET_SIZE(self);
1151 PyObject *new;
1152
Guido van Rossum43713e52000-02-29 13:59:29 +00001153 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154 return NULL;
1155 new = PyString_FromStringAndSize(NULL, n);
1156 if (new == NULL)
1157 return NULL;
1158 s_new = PyString_AsString(new);
1159 if (0 < n) {
1160 int c = Py_CHARMASK(*s++);
1161 if (islower(c))
1162 *s_new = toupper(c);
1163 else
1164 *s_new = c;
1165 s_new++;
1166 }
1167 for (i = 1; i < n; i++) {
1168 int c = Py_CHARMASK(*s++);
1169 if (isupper(c))
1170 *s_new = tolower(c);
1171 else
1172 *s_new = c;
1173 s_new++;
1174 }
1175 return new;
1176}
1177
1178
1179static char count__doc__[] =
1180"S.count(sub[, start[, end]]) -> int\n\
1181\n\
1182Return the number of occurrences of substring sub in string\n\
1183S[start:end]. Optional arguments start and end are\n\
1184interpreted as in slice notation.";
1185
1186static PyObject *
1187string_count(self, args)
1188 PyStringObject *self;
1189 PyObject *args;
1190{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001191 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001192 int len = PyString_GET_SIZE(self), n;
1193 int i = 0, last = INT_MAX;
1194 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001195 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001196
Guido van Rossumc6821402000-05-08 14:08:05 +00001197 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1198 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001200
Guido van Rossum4c08d552000-03-10 22:55:18 +00001201 if (PyString_Check(subobj)) {
1202 sub = PyString_AS_STRING(subobj);
1203 n = PyString_GET_SIZE(subobj);
1204 }
1205 else if (PyUnicode_Check(subobj))
1206 return PyInt_FromLong(
1207 PyUnicode_Count((PyObject *)self, subobj, i, last));
1208 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1209 return NULL;
1210
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001211 if (last > len)
1212 last = len;
1213 if (last < 0)
1214 last += len;
1215 if (last < 0)
1216 last = 0;
1217 if (i < 0)
1218 i += len;
1219 if (i < 0)
1220 i = 0;
1221 m = last + 1 - n;
1222 if (n == 0)
1223 return PyInt_FromLong((long) (m-i));
1224
1225 r = 0;
1226 while (i < m) {
1227 if (!memcmp(s+i, sub, n)) {
1228 r++;
1229 i += n;
1230 } else {
1231 i++;
1232 }
1233 }
1234 return PyInt_FromLong((long) r);
1235}
1236
1237
1238static char swapcase__doc__[] =
1239"S.swapcase() -> string\n\
1240\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001241Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242converted to lowercase and vice versa.";
1243
1244static PyObject *
1245string_swapcase(self, args)
1246 PyStringObject *self;
1247 PyObject *args;
1248{
1249 char *s = PyString_AS_STRING(self), *s_new;
1250 int i, n = PyString_GET_SIZE(self);
1251 PyObject *new;
1252
Guido van Rossum43713e52000-02-29 13:59:29 +00001253 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001254 return NULL;
1255 new = PyString_FromStringAndSize(NULL, n);
1256 if (new == NULL)
1257 return NULL;
1258 s_new = PyString_AsString(new);
1259 for (i = 0; i < n; i++) {
1260 int c = Py_CHARMASK(*s++);
1261 if (islower(c)) {
1262 *s_new = toupper(c);
1263 }
1264 else if (isupper(c)) {
1265 *s_new = tolower(c);
1266 }
1267 else
1268 *s_new = c;
1269 s_new++;
1270 }
1271 return new;
1272}
1273
1274
1275static char translate__doc__[] =
1276"S.translate(table [,deletechars]) -> string\n\
1277\n\
1278Return a copy of the string S, where all characters occurring\n\
1279in the optional argument deletechars are removed, and the\n\
1280remaining characters have been mapped through the given\n\
1281translation table, which must be a string of length 256.";
1282
1283static PyObject *
1284string_translate(self, args)
1285 PyStringObject *self;
1286 PyObject *args;
1287{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001288 register char *input, *output;
1289 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 register int i, c, changed = 0;
1291 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293 int inlen, tablen, dellen = 0;
1294 PyObject *result;
1295 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001297
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 if (!PyArg_ParseTuple(args, "O|O:translate",
1299 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301
1302 if (PyString_Check(tableobj)) {
1303 table1 = PyString_AS_STRING(tableobj);
1304 tablen = PyString_GET_SIZE(tableobj);
1305 }
1306 else if (PyUnicode_Check(tableobj)) {
1307 /* Unicode .translate() does not support the deletechars
1308 parameter; instead a mapping to None will cause characters
1309 to be deleted. */
1310 if (delobj != NULL) {
1311 PyErr_SetString(PyExc_TypeError,
1312 "deletions are implemented differently for unicode");
1313 return NULL;
1314 }
1315 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1316 }
1317 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319
1320 if (delobj != NULL) {
1321 if (PyString_Check(delobj)) {
1322 del_table = PyString_AS_STRING(delobj);
1323 dellen = PyString_GET_SIZE(delobj);
1324 }
1325 else if (PyUnicode_Check(delobj)) {
1326 PyErr_SetString(PyExc_TypeError,
1327 "deletions are implemented differently for unicode");
1328 return NULL;
1329 }
1330 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1331 return NULL;
1332
1333 if (tablen != 256) {
1334 PyErr_SetString(PyExc_ValueError,
1335 "translation table must be 256 characters long");
1336 return NULL;
1337 }
1338 }
1339 else {
1340 del_table = NULL;
1341 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342 }
1343
1344 table = table1;
1345 inlen = PyString_Size(input_obj);
1346 result = PyString_FromStringAndSize((char *)NULL, inlen);
1347 if (result == NULL)
1348 return NULL;
1349 output_start = output = PyString_AsString(result);
1350 input = PyString_AsString(input_obj);
1351
1352 if (dellen == 0) {
1353 /* If no deletions are required, use faster code */
1354 for (i = inlen; --i >= 0; ) {
1355 c = Py_CHARMASK(*input++);
1356 if (Py_CHARMASK((*output++ = table[c])) != c)
1357 changed = 1;
1358 }
1359 if (changed)
1360 return result;
1361 Py_DECREF(result);
1362 Py_INCREF(input_obj);
1363 return input_obj;
1364 }
1365
1366 for (i = 0; i < 256; i++)
1367 trans_table[i] = Py_CHARMASK(table[i]);
1368
1369 for (i = 0; i < dellen; i++)
1370 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1371
1372 for (i = inlen; --i >= 0; ) {
1373 c = Py_CHARMASK(*input++);
1374 if (trans_table[c] != -1)
1375 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1376 continue;
1377 changed = 1;
1378 }
1379 if (!changed) {
1380 Py_DECREF(result);
1381 Py_INCREF(input_obj);
1382 return input_obj;
1383 }
1384 /* Fix the size of the resulting string */
1385 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1386 return NULL;
1387 return result;
1388}
1389
1390
1391/* What follows is used for implementing replace(). Perry Stoll. */
1392
1393/*
1394 mymemfind
1395
1396 strstr replacement for arbitrary blocks of memory.
1397
Barry Warsaw51ac5802000-03-20 16:36:48 +00001398 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 contents of memory pointed to by PAT. Returns the index into MEM if
1400 found, or -1 if not found. If len of PAT is greater than length of
1401 MEM, the function returns -1.
1402*/
1403static int
1404mymemfind(mem, len, pat, pat_len)
1405 char *mem;
1406 int len;
1407 char *pat;
1408 int pat_len;
1409{
1410 register int ii;
1411
1412 /* pattern can not occur in the last pat_len-1 chars */
1413 len -= pat_len;
1414
1415 for (ii = 0; ii <= len; ii++) {
1416 if (mem[ii] == pat[0] &&
1417 (pat_len == 1 ||
1418 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1419 return ii;
1420 }
1421 }
1422 return -1;
1423}
1424
1425/*
1426 mymemcnt
1427
1428 Return the number of distinct times PAT is found in MEM.
1429 meaning mem=1111 and pat==11 returns 2.
1430 mem=11111 and pat==11 also return 2.
1431 */
1432static int
1433mymemcnt(mem, len, pat, pat_len)
1434 char *mem;
1435 int len;
1436 char *pat;
1437 int pat_len;
1438{
1439 register int offset = 0;
1440 int nfound = 0;
1441
1442 while (len >= 0) {
1443 offset = mymemfind(mem, len, pat, pat_len);
1444 if (offset == -1)
1445 break;
1446 mem += offset + pat_len;
1447 len -= offset + pat_len;
1448 nfound++;
1449 }
1450 return nfound;
1451}
1452
1453/*
1454 mymemreplace
1455
1456 Return a string in which all occurences of PAT in memory STR are
1457 replaced with SUB.
1458
1459 If length of PAT is less than length of STR or there are no occurences
1460 of PAT in STR, then the original string is returned. Otherwise, a new
1461 string is allocated here and returned.
1462
1463 on return, out_len is:
1464 the length of output string, or
1465 -1 if the input string is returned, or
1466 unchanged if an error occurs (no memory).
1467
1468 return value is:
1469 the new string allocated locally, or
1470 NULL if an error occurred.
1471*/
1472static char *
1473mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1474 char *str;
1475 int len; /* input string */
1476 char *pat;
1477 int pat_len; /* pattern string to find */
1478 char *sub;
1479 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 int *out_len;
1482
1483{
1484 char *out_s;
1485 char *new_s;
1486 int nfound, offset, new_len;
1487
1488 if (len == 0 || pat_len > len)
1489 goto return_same;
1490
1491 /* find length of output string */
1492 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 if (count < 0)
1494 count = INT_MAX;
1495 else if (nfound > count)
1496 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 if (nfound == 0)
1498 goto return_same;
1499 new_len = len + nfound*(sub_len - pat_len);
1500
Guido van Rossumb18618d2000-05-03 23:44:39 +00001501 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502 if (new_s == NULL) return NULL;
1503
1504 *out_len = new_len;
1505 out_s = new_s;
1506
1507 while (len > 0) {
1508 /* find index of next instance of pattern */
1509 offset = mymemfind(str, len, pat, pat_len);
1510 /* if not found, break out of loop */
1511 if (offset == -1) break;
1512
1513 /* copy non matching part of input string */
1514 memcpy(new_s, str, offset); /* copy part of str before pat */
1515 str += offset + pat_len; /* move str past pattern */
1516 len -= offset + pat_len; /* reduce length of str remaining */
1517
1518 /* copy substitute into the output string */
1519 new_s += offset; /* move new_s to dest for sub string */
1520 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1521 new_s += sub_len; /* offset new_s past sub string */
1522
1523 /* break when we've done count replacements */
1524 if (--count == 0) break;
1525 }
1526 /* copy any remaining values into output string */
1527 if (len > 0)
1528 memcpy(new_s, str, len);
1529 return out_s;
1530
1531 return_same:
1532 *out_len = -1;
1533 return str;
1534}
1535
1536
1537static char replace__doc__[] =
1538"S.replace (old, new[, maxsplit]) -> string\n\
1539\n\
1540Return a copy of string S with all occurrences of substring\n\
1541old replaced by new. If the optional argument maxsplit is\n\
1542given, only the first maxsplit occurrences are replaced.";
1543
1544static PyObject *
1545string_replace(self, args)
1546 PyStringObject *self;
1547 PyObject *args;
1548{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 const char *str = PyString_AS_STRING(self), *sub, *repl;
1550 char *new_s;
1551 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1552 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555
Guido van Rossum4c08d552000-03-10 22:55:18 +00001556 if (!PyArg_ParseTuple(args, "OO|i:replace",
1557 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001559
1560 if (PyString_Check(subobj)) {
1561 sub = PyString_AS_STRING(subobj);
1562 sub_len = PyString_GET_SIZE(subobj);
1563 }
1564 else if (PyUnicode_Check(subobj))
1565 return PyUnicode_Replace((PyObject *)self,
1566 subobj, replobj, count);
1567 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1568 return NULL;
1569
1570 if (PyString_Check(replobj)) {
1571 repl = PyString_AS_STRING(replobj);
1572 repl_len = PyString_GET_SIZE(replobj);
1573 }
1574 else if (PyUnicode_Check(replobj))
1575 return PyUnicode_Replace((PyObject *)self,
1576 subobj, replobj, count);
1577 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1578 return NULL;
1579
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001580 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001581 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 return NULL;
1583 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 if (new_s == NULL) {
1586 PyErr_NoMemory();
1587 return NULL;
1588 }
1589 if (out_len == -1) {
1590 /* we're returning another reference to self */
1591 new = (PyObject*)self;
1592 Py_INCREF(new);
1593 }
1594 else {
1595 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001596 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597 }
1598 return new;
1599}
1600
1601
1602static char startswith__doc__[] =
1603"S.startswith(prefix[, start[, end]]) -> int\n\
1604\n\
1605Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1606optional start, test S beginning at that position. With optional end, stop\n\
1607comparing S at that position.";
1608
1609static PyObject *
1610string_startswith(self, args)
1611 PyStringObject *self;
1612 PyObject *args;
1613{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 int plen;
1618 int start = 0;
1619 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001620 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621
Guido van Rossumc6821402000-05-08 14:08:05 +00001622 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1623 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001624 return NULL;
1625 if (PyString_Check(subobj)) {
1626 prefix = PyString_AS_STRING(subobj);
1627 plen = PyString_GET_SIZE(subobj);
1628 }
1629 else if (PyUnicode_Check(subobj))
1630 return PyInt_FromLong(
1631 PyUnicode_Tailmatch((PyObject *)self,
1632 subobj, start, end, -1));
1633 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634 return NULL;
1635
1636 /* adopt Java semantics for index out of range. it is legal for
1637 * offset to be == plen, but this only returns true if prefix is
1638 * the empty string.
1639 */
1640 if (start < 0 || start+plen > len)
1641 return PyInt_FromLong(0);
1642
1643 if (!memcmp(str+start, prefix, plen)) {
1644 /* did the match end after the specified end? */
1645 if (end < 0)
1646 return PyInt_FromLong(1);
1647 else if (end - start < plen)
1648 return PyInt_FromLong(0);
1649 else
1650 return PyInt_FromLong(1);
1651 }
1652 else return PyInt_FromLong(0);
1653}
1654
1655
1656static char endswith__doc__[] =
1657"S.endswith(suffix[, start[, end]]) -> int\n\
1658\n\
1659Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1660optional start, test S beginning at that position. With optional end, stop\n\
1661comparing S at that position.";
1662
1663static PyObject *
1664string_endswith(self, args)
1665 PyStringObject *self;
1666 PyObject *args;
1667{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001670 const char* suffix;
1671 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672 int start = 0;
1673 int end = -1;
1674 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001675 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676
Guido van Rossumc6821402000-05-08 14:08:05 +00001677 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1678 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 return NULL;
1680 if (PyString_Check(subobj)) {
1681 suffix = PyString_AS_STRING(subobj);
1682 slen = PyString_GET_SIZE(subobj);
1683 }
1684 else if (PyUnicode_Check(subobj))
1685 return PyInt_FromLong(
1686 PyUnicode_Tailmatch((PyObject *)self,
1687 subobj, start, end, +1));
1688 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689 return NULL;
1690
Guido van Rossum4c08d552000-03-10 22:55:18 +00001691 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692 return PyInt_FromLong(0);
1693
1694 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001695 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696
Guido van Rossum4c08d552000-03-10 22:55:18 +00001697 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698 return PyInt_FromLong(1);
1699 else return PyInt_FromLong(0);
1700}
1701
1702
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703static char expandtabs__doc__[] =
1704"S.expandtabs([tabsize]) -> string\n\
1705\n\
1706Return a copy of S where all tab characters are expanded using spaces.\n\
1707If tabsize is not given, a tab size of 8 characters is assumed.";
1708
1709static PyObject*
1710string_expandtabs(PyStringObject *self, PyObject *args)
1711{
1712 const char *e, *p;
1713 char *q;
1714 int i, j;
1715 PyObject *u;
1716 int tabsize = 8;
1717
1718 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1719 return NULL;
1720
1721 /* First pass: determine size of ouput string */
1722 i = j = 0;
1723 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1724 for (p = PyString_AS_STRING(self); p < e; p++)
1725 if (*p == '\t') {
1726 if (tabsize > 0)
1727 j += tabsize - (j % tabsize);
1728 }
1729 else {
1730 j++;
1731 if (*p == '\n' || *p == '\r') {
1732 i += j;
1733 j = 0;
1734 }
1735 }
1736
1737 /* Second pass: create output string and fill it */
1738 u = PyString_FromStringAndSize(NULL, i + j);
1739 if (!u)
1740 return NULL;
1741
1742 j = 0;
1743 q = PyString_AS_STRING(u);
1744
1745 for (p = PyString_AS_STRING(self); p < e; p++)
1746 if (*p == '\t') {
1747 if (tabsize > 0) {
1748 i = tabsize - (j % tabsize);
1749 j += i;
1750 while (i--)
1751 *q++ = ' ';
1752 }
1753 }
1754 else {
1755 j++;
1756 *q++ = *p;
1757 if (*p == '\n' || *p == '\r')
1758 j = 0;
1759 }
1760
1761 return u;
1762}
1763
1764static
1765PyObject *pad(PyStringObject *self,
1766 int left,
1767 int right,
1768 char fill)
1769{
1770 PyObject *u;
1771
1772 if (left < 0)
1773 left = 0;
1774 if (right < 0)
1775 right = 0;
1776
1777 if (left == 0 && right == 0) {
1778 Py_INCREF(self);
1779 return (PyObject *)self;
1780 }
1781
1782 u = PyString_FromStringAndSize(NULL,
1783 left + PyString_GET_SIZE(self) + right);
1784 if (u) {
1785 if (left)
1786 memset(PyString_AS_STRING(u), fill, left);
1787 memcpy(PyString_AS_STRING(u) + left,
1788 PyString_AS_STRING(self),
1789 PyString_GET_SIZE(self));
1790 if (right)
1791 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1792 fill, right);
1793 }
1794
1795 return u;
1796}
1797
1798static char ljust__doc__[] =
1799"S.ljust(width) -> string\n\
1800\n\
1801Return S left justified in a string of length width. Padding is\n\
1802done using spaces.";
1803
1804static PyObject *
1805string_ljust(PyStringObject *self, PyObject *args)
1806{
1807 int width;
1808 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1809 return NULL;
1810
1811 if (PyString_GET_SIZE(self) >= width) {
1812 Py_INCREF(self);
1813 return (PyObject*) self;
1814 }
1815
1816 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1817}
1818
1819
1820static char rjust__doc__[] =
1821"S.rjust(width) -> string\n\
1822\n\
1823Return S right justified in a string of length width. Padding is\n\
1824done using spaces.";
1825
1826static PyObject *
1827string_rjust(PyStringObject *self, PyObject *args)
1828{
1829 int width;
1830 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1831 return NULL;
1832
1833 if (PyString_GET_SIZE(self) >= width) {
1834 Py_INCREF(self);
1835 return (PyObject*) self;
1836 }
1837
1838 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1839}
1840
1841
1842static char center__doc__[] =
1843"S.center(width) -> string\n\
1844\n\
1845Return S centered in a string of length width. Padding is done\n\
1846using spaces.";
1847
1848static PyObject *
1849string_center(PyStringObject *self, PyObject *args)
1850{
1851 int marg, left;
1852 int width;
1853
1854 if (!PyArg_ParseTuple(args, "i:center", &width))
1855 return NULL;
1856
1857 if (PyString_GET_SIZE(self) >= width) {
1858 Py_INCREF(self);
1859 return (PyObject*) self;
1860 }
1861
1862 marg = width - PyString_GET_SIZE(self);
1863 left = marg / 2 + (marg & width & 1);
1864
1865 return pad(self, left, marg - left, ' ');
1866}
1867
1868#if 0
1869static char zfill__doc__[] =
1870"S.zfill(width) -> string\n\
1871\n\
1872Pad a numeric string x with zeros on the left, to fill a field\n\
1873of the specified width. The string x is never truncated.";
1874
1875static PyObject *
1876string_zfill(PyStringObject *self, PyObject *args)
1877{
1878 int fill;
1879 PyObject *u;
1880 char *str;
1881
1882 int width;
1883 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1884 return NULL;
1885
1886 if (PyString_GET_SIZE(self) >= width) {
1887 Py_INCREF(self);
1888 return (PyObject*) self;
1889 }
1890
1891 fill = width - PyString_GET_SIZE(self);
1892
1893 u = pad(self, fill, 0, '0');
1894 if (u == NULL)
1895 return NULL;
1896
1897 str = PyString_AS_STRING(u);
1898 if (str[fill] == '+' || str[fill] == '-') {
1899 /* move sign to beginning of string */
1900 str[0] = str[fill];
1901 str[fill] = '0';
1902 }
1903
1904 return u;
1905}
1906#endif
1907
1908static char isspace__doc__[] =
1909"S.isspace() -> int\n\
1910\n\
1911Return 1 if there are only whitespace characters in S,\n\
19120 otherwise.";
1913
1914static PyObject*
1915string_isspace(PyStringObject *self, PyObject *args)
1916{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001917 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1918 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001919
1920 if (!PyArg_NoArgs(args))
1921 return NULL;
1922
1923 /* Shortcut for single character strings */
1924 if (PyString_GET_SIZE(self) == 1 &&
1925 isspace(*p))
1926 return PyInt_FromLong(1);
1927
1928 e = p + PyString_GET_SIZE(self);
1929 for (; p < e; p++) {
1930 if (!isspace(*p))
1931 return PyInt_FromLong(0);
1932 }
1933 return PyInt_FromLong(1);
1934}
1935
1936
1937static char isdigit__doc__[] =
1938"S.isdigit() -> int\n\
1939\n\
1940Return 1 if there are only digit characters in S,\n\
19410 otherwise.";
1942
1943static PyObject*
1944string_isdigit(PyStringObject *self, PyObject *args)
1945{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001946 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1947 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001948
1949 if (!PyArg_NoArgs(args))
1950 return NULL;
1951
1952 /* Shortcut for single character strings */
1953 if (PyString_GET_SIZE(self) == 1 &&
1954 isdigit(*p))
1955 return PyInt_FromLong(1);
1956
1957 e = p + PyString_GET_SIZE(self);
1958 for (; p < e; p++) {
1959 if (!isdigit(*p))
1960 return PyInt_FromLong(0);
1961 }
1962 return PyInt_FromLong(1);
1963}
1964
1965
1966static char islower__doc__[] =
1967"S.islower() -> int\n\
1968\n\
1969Return 1 if all cased characters in S are lowercase and there is\n\
1970at least one cased character in S, 0 otherwise.";
1971
1972static PyObject*
1973string_islower(PyStringObject *self, PyObject *args)
1974{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001975 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1976 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001977 int cased;
1978
1979 if (!PyArg_NoArgs(args))
1980 return NULL;
1981
1982 /* Shortcut for single character strings */
1983 if (PyString_GET_SIZE(self) == 1)
1984 return PyInt_FromLong(islower(*p) != 0);
1985
1986 e = p + PyString_GET_SIZE(self);
1987 cased = 0;
1988 for (; p < e; p++) {
1989 if (isupper(*p))
1990 return PyInt_FromLong(0);
1991 else if (!cased && islower(*p))
1992 cased = 1;
1993 }
1994 return PyInt_FromLong(cased);
1995}
1996
1997
1998static char isupper__doc__[] =
1999"S.isupper() -> int\n\
2000\n\
2001Return 1 if all cased characters in S are uppercase and there is\n\
2002at least one cased character in S, 0 otherwise.";
2003
2004static PyObject*
2005string_isupper(PyStringObject *self, PyObject *args)
2006{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002007 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2008 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002009 int cased;
2010
2011 if (!PyArg_NoArgs(args))
2012 return NULL;
2013
2014 /* Shortcut for single character strings */
2015 if (PyString_GET_SIZE(self) == 1)
2016 return PyInt_FromLong(isupper(*p) != 0);
2017
2018 e = p + PyString_GET_SIZE(self);
2019 cased = 0;
2020 for (; p < e; p++) {
2021 if (islower(*p))
2022 return PyInt_FromLong(0);
2023 else if (!cased && isupper(*p))
2024 cased = 1;
2025 }
2026 return PyInt_FromLong(cased);
2027}
2028
2029
2030static char istitle__doc__[] =
2031"S.istitle() -> int\n\
2032\n\
2033Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2034may only follow uncased characters and lowercase characters only cased\n\
2035ones. Return 0 otherwise.";
2036
2037static PyObject*
2038string_istitle(PyStringObject *self, PyObject *args)
2039{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002040 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2041 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002042 int cased, previous_is_cased;
2043
2044 if (!PyArg_NoArgs(args))
2045 return NULL;
2046
2047 /* Shortcut for single character strings */
2048 if (PyString_GET_SIZE(self) == 1)
2049 return PyInt_FromLong(isupper(*p) != 0);
2050
2051 e = p + PyString_GET_SIZE(self);
2052 cased = 0;
2053 previous_is_cased = 0;
2054 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002055 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002056
2057 if (isupper(ch)) {
2058 if (previous_is_cased)
2059 return PyInt_FromLong(0);
2060 previous_is_cased = 1;
2061 cased = 1;
2062 }
2063 else if (islower(ch)) {
2064 if (!previous_is_cased)
2065 return PyInt_FromLong(0);
2066 previous_is_cased = 1;
2067 cased = 1;
2068 }
2069 else
2070 previous_is_cased = 0;
2071 }
2072 return PyInt_FromLong(cased);
2073}
2074
2075
2076static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002077"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078\n\
2079Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002080Line breaks are not included in the resulting list unless keepends\n\
2081is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002082
2083#define SPLIT_APPEND(data, left, right) \
2084 str = PyString_FromStringAndSize(data + left, right - left); \
2085 if (!str) \
2086 goto onError; \
2087 if (PyList_Append(list, str)) { \
2088 Py_DECREF(str); \
2089 goto onError; \
2090 } \
2091 else \
2092 Py_DECREF(str);
2093
2094static PyObject*
2095string_splitlines(PyStringObject *self, PyObject *args)
2096{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002097 register int i;
2098 register int j;
2099 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002100 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101 PyObject *list;
2102 PyObject *str;
2103 char *data;
2104
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002105 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106 return NULL;
2107
2108 data = PyString_AS_STRING(self);
2109 len = PyString_GET_SIZE(self);
2110
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 list = PyList_New(0);
2112 if (!list)
2113 goto onError;
2114
2115 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002116 int eol;
2117
Guido van Rossum4c08d552000-03-10 22:55:18 +00002118 /* Find a line and append it */
2119 while (i < len && data[i] != '\n' && data[i] != '\r')
2120 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002121
2122 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002123 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002124 if (i < len) {
2125 if (data[i] == '\r' && i + 1 < len &&
2126 data[i+1] == '\n')
2127 i += 2;
2128 else
2129 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002130 if (keepends)
2131 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002132 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002133 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134 j = i;
2135 }
2136 if (j < len) {
2137 SPLIT_APPEND(data, j, len);
2138 }
2139
2140 return list;
2141
2142 onError:
2143 Py_DECREF(list);
2144 return NULL;
2145}
2146
2147#undef SPLIT_APPEND
2148
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149
2150static PyMethodDef
2151string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002152 /* Counterparts of the obsolete stropmodule functions; except
2153 string.maketrans(). */
2154 {"join", (PyCFunction)string_join, 1, join__doc__},
2155 {"split", (PyCFunction)string_split, 1, split__doc__},
2156 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2157 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2158 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2159 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2160 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2161 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2162 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2164 {"count", (PyCFunction)string_count, 1, count__doc__},
2165 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2166 {"find", (PyCFunction)string_find, 1, find__doc__},
2167 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2170 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2171 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2172 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2174 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2175 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2177 {"title", (PyCFunction)string_title, 1, title__doc__},
2178 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2179 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2180 {"center", (PyCFunction)string_center, 1, center__doc__},
2181 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2182 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2183#if 0
2184 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2185#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186 {NULL, NULL} /* sentinel */
2187};
2188
2189static PyObject *
2190string_getattr(s, name)
2191 PyStringObject *s;
2192 char *name;
2193{
2194 return Py_FindMethod(string_methods, (PyObject*)s, name);
2195}
2196
2197
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002198PyTypeObject PyString_Type = {
2199 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002200 0,
2201 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002202 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002203 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002204 (destructor)string_dealloc, /*tp_dealloc*/
2205 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002207 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002208 (cmpfunc)string_compare, /*tp_compare*/
2209 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002210 0, /*tp_as_number*/
2211 &string_as_sequence, /*tp_as_sequence*/
2212 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002213 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002214 0, /*tp_call*/
2215 0, /*tp_str*/
2216 0, /*tp_getattro*/
2217 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002218 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002219 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002220 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002221};
2222
2223void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002224PyString_Concat(pv, w)
2225 register PyObject **pv;
2226 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002227{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002228 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002229 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002230 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002231 if (w == NULL || !PyString_Check(*pv)) {
2232 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002233 *pv = NULL;
2234 return;
2235 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002236 v = string_concat((PyStringObject *) *pv, w);
2237 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002238 *pv = v;
2239}
2240
Guido van Rossum013142a1994-08-30 08:19:36 +00002241void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002242PyString_ConcatAndDel(pv, w)
2243 register PyObject **pv;
2244 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002245{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002246 PyString_Concat(pv, w);
2247 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002248}
2249
2250
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002251/* The following function breaks the notion that strings are immutable:
2252 it changes the size of a string. We get away with this only if there
2253 is only one module referencing the object. You can also think of it
2254 as creating a new string object and destroying the old one, only
2255 more efficiently. In any case, don't use this if the string may
2256 already be known to some other part of the code... */
2257
2258int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002259_PyString_Resize(pv, newsize)
2260 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002261 int newsize;
2262{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002263 register PyObject *v;
2264 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002265 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002266 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002267 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002268 Py_DECREF(v);
2269 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002270 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002271 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002272 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002273#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002274 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002275#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002276 _Py_ForgetReference(v);
2277 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002278 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002279 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002280 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002281 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002282 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002283 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002284 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002285 _Py_NewReference(*pv);
2286 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002287 sv->ob_size = newsize;
2288 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002289 return 0;
2290}
Guido van Rossume5372401993-03-16 12:15:04 +00002291
2292/* Helpers for formatstring */
2293
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002294static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002295getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002296 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002297 int arglen;
2298 int *p_argidx;
2299{
2300 int argidx = *p_argidx;
2301 if (argidx < arglen) {
2302 (*p_argidx)++;
2303 if (arglen < 0)
2304 return args;
2305 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002306 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002307 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002308 PyErr_SetString(PyExc_TypeError,
2309 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002310 return NULL;
2311}
2312
2313#define F_LJUST (1<<0)
2314#define F_SIGN (1<<1)
2315#define F_BLANK (1<<2)
2316#define F_ALT (1<<3)
2317#define F_ZERO (1<<4)
2318
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002319static int
2320formatfloat(buf, flags, prec, type, v)
2321 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002322 int flags;
2323 int prec;
2324 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002325 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002326{
2327 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002328 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002329 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002330 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002331 if (prec < 0)
2332 prec = 6;
2333 if (prec > 50)
2334 prec = 50; /* Arbitrary limitation */
2335 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2336 type = 'g';
2337 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2338 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002339 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002340}
2341
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002342static int
2343formatint(buf, flags, prec, type, v)
2344 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002345 int flags;
2346 int prec;
2347 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002348 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002349{
2350 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002351 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002352 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002353 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002354 if (prec < 0)
2355 prec = 1;
2356 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2357 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002358 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002359}
2360
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002361static int
2362formatchar(buf, v)
2363 char *buf;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002364 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002365{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002366 if (PyString_Check(v)) {
2367 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002368 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002369 }
2370 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002371 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002372 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002373 }
2374 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002375 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002376}
2377
Guido van Rossum013142a1994-08-30 08:19:36 +00002378
Guido van Rossume5372401993-03-16 12:15:04 +00002379/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
2380
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002381PyObject *
2382PyString_Format(format, args)
2383 PyObject *format;
2384 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002385{
2386 char *fmt, *res;
2387 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002388 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002389 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002390 PyObject *dict = NULL;
2391 if (format == NULL || !PyString_Check(format) || args == NULL) {
2392 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002393 return NULL;
2394 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002395 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002396 fmt = PyString_AsString(format);
2397 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002398 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002399 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002400 if (result == NULL)
2401 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002402 res = PyString_AsString(result);
2403 if (PyTuple_Check(args)) {
2404 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002405 argidx = 0;
2406 }
2407 else {
2408 arglen = -1;
2409 argidx = -2;
2410 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002411 if (args->ob_type->tp_as_mapping)
2412 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002413 while (--fmtcnt >= 0) {
2414 if (*fmt != '%') {
2415 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002416 rescnt = fmtcnt + 100;
2417 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002418 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002419 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002420 res = PyString_AsString(result)
2421 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002422 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002423 }
2424 *res++ = *fmt++;
2425 }
2426 else {
2427 /* Got a format specifier */
2428 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002429 int width = -1;
2430 int prec = -1;
2431 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002432 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002433 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002434 PyObject *v = NULL;
2435 PyObject *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +00002436 char *buf;
2437 int sign;
2438 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002439 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002440 char *fmt_start = fmt;
2441
Guido van Rossumda9c2711996-12-05 21:58:58 +00002442 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002443 if (*fmt == '(') {
2444 char *keystart;
2445 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002446 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002447 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002448
2449 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002450 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002451 "format requires a mapping");
2452 goto error;
2453 }
2454 ++fmt;
2455 --fmtcnt;
2456 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002457 /* Skip over balanced parentheses */
2458 while (pcount > 0 && --fmtcnt >= 0) {
2459 if (*fmt == ')')
2460 --pcount;
2461 else if (*fmt == '(')
2462 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002463 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002464 }
2465 keylen = fmt - keystart - 1;
2466 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002467 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002468 "incomplete format key");
2469 goto error;
2470 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002471 key = PyString_FromStringAndSize(keystart,
2472 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002473 if (key == NULL)
2474 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002475 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002476 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002477 args_owned = 0;
2478 }
2479 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002480 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002481 if (args == NULL) {
2482 goto error;
2483 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002484 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002485 arglen = -1;
2486 argidx = -2;
2487 }
Guido van Rossume5372401993-03-16 12:15:04 +00002488 while (--fmtcnt >= 0) {
2489 switch (c = *fmt++) {
2490 case '-': flags |= F_LJUST; continue;
2491 case '+': flags |= F_SIGN; continue;
2492 case ' ': flags |= F_BLANK; continue;
2493 case '#': flags |= F_ALT; continue;
2494 case '0': flags |= F_ZERO; continue;
2495 }
2496 break;
2497 }
2498 if (c == '*') {
2499 v = getnextarg(args, arglen, &argidx);
2500 if (v == NULL)
2501 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002502 if (!PyInt_Check(v)) {
2503 PyErr_SetString(PyExc_TypeError,
2504 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002505 goto error;
2506 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002507 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002508 if (width < 0) {
2509 flags |= F_LJUST;
2510 width = -width;
2511 }
Guido van Rossume5372401993-03-16 12:15:04 +00002512 if (--fmtcnt >= 0)
2513 c = *fmt++;
2514 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002515 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002516 width = c - '0';
2517 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002518 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002519 if (!isdigit(c))
2520 break;
2521 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002522 PyErr_SetString(
2523 PyExc_ValueError,
2524 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002525 goto error;
2526 }
2527 width = width*10 + (c - '0');
2528 }
2529 }
2530 if (c == '.') {
2531 prec = 0;
2532 if (--fmtcnt >= 0)
2533 c = *fmt++;
2534 if (c == '*') {
2535 v = getnextarg(args, arglen, &argidx);
2536 if (v == NULL)
2537 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002538 if (!PyInt_Check(v)) {
2539 PyErr_SetString(
2540 PyExc_TypeError,
2541 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002542 goto error;
2543 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002544 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002545 if (prec < 0)
2546 prec = 0;
2547 if (--fmtcnt >= 0)
2548 c = *fmt++;
2549 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002550 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002551 prec = c - '0';
2552 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002553 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002554 if (!isdigit(c))
2555 break;
2556 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002557 PyErr_SetString(
2558 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002559 "prec too big");
2560 goto error;
2561 }
2562 prec = prec*10 + (c - '0');
2563 }
2564 }
2565 } /* prec */
2566 if (fmtcnt >= 0) {
2567 if (c == 'h' || c == 'l' || c == 'L') {
2568 size = c;
2569 if (--fmtcnt >= 0)
2570 c = *fmt++;
2571 }
2572 }
2573 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002574 PyErr_SetString(PyExc_ValueError,
2575 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002576 goto error;
2577 }
2578 if (c != '%') {
2579 v = getnextarg(args, arglen, &argidx);
2580 if (v == NULL)
2581 goto error;
2582 }
2583 sign = 0;
2584 fill = ' ';
2585 switch (c) {
2586 case '%':
2587 buf = "%";
2588 len = 1;
2589 break;
2590 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002591 case 'r':
2592 if (PyUnicode_Check(v)) {
2593 fmt = fmt_start;
2594 goto unicode;
2595 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002596 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002597 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002598 else
2599 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002600 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002601 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002602 if (!PyString_Check(temp)) {
2603 PyErr_SetString(PyExc_TypeError,
2604 "%s argument has non-string str()");
2605 goto error;
2606 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002607 buf = PyString_AsString(temp);
2608 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002609 if (prec >= 0 && len > prec)
2610 len = prec;
2611 break;
2612 case 'i':
2613 case 'd':
2614 case 'u':
2615 case 'o':
2616 case 'x':
2617 case 'X':
2618 if (c == 'i')
2619 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002620 buf = tmpbuf;
2621 len = formatint(buf, flags, prec, c, v);
2622 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002623 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002624 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002625 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002626 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002627 if ((flags&F_ALT) &&
2628 (c == 'x' || c == 'X') &&
2629 buf[0] == '0' && buf[1] == c) {
2630 *res++ = *buf++;
2631 *res++ = *buf++;
2632 rescnt -= 2;
2633 len -= 2;
2634 width -= 2;
2635 if (width < 0)
2636 width = 0;
2637 }
2638 }
Guido van Rossume5372401993-03-16 12:15:04 +00002639 break;
2640 case 'e':
2641 case 'E':
2642 case 'f':
2643 case 'g':
2644 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002645 buf = tmpbuf;
2646 len = formatfloat(buf, flags, prec, c, v);
2647 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002648 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002649 sign = 1;
2650 if (flags&F_ZERO)
2651 fill = '0';
2652 break;
2653 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002654 buf = tmpbuf;
2655 len = formatchar(buf, v);
2656 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002657 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002658 break;
2659 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002660 PyErr_Format(PyExc_ValueError,
2661 "unsupported format character '%c' (0x%x)",
2662 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002663 goto error;
2664 }
2665 if (sign) {
2666 if (*buf == '-' || *buf == '+') {
2667 sign = *buf++;
2668 len--;
2669 }
2670 else if (flags & F_SIGN)
2671 sign = '+';
2672 else if (flags & F_BLANK)
2673 sign = ' ';
2674 else
2675 sign = '\0';
2676 }
2677 if (width < len)
2678 width = len;
2679 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002680 reslen -= rescnt;
2681 rescnt = width + fmtcnt + 100;
2682 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002683 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002684 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002685 res = PyString_AsString(result)
2686 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002687 }
2688 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002689 if (fill != ' ')
2690 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002691 rescnt--;
2692 if (width > len)
2693 width--;
2694 }
2695 if (width > len && !(flags&F_LJUST)) {
2696 do {
2697 --rescnt;
2698 *res++ = fill;
2699 } while (--width > len);
2700 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002701 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002702 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002703 memcpy(res, buf, len);
2704 res += len;
2705 rescnt -= len;
2706 while (--width >= len) {
2707 --rescnt;
2708 *res++ = ' ';
2709 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002710 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002711 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002712 "not all arguments converted");
2713 goto error;
2714 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002715 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002716 } /* '%' */
2717 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002718 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002719 PyErr_SetString(PyExc_TypeError,
2720 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002721 goto error;
2722 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002723 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002724 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002725 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002726 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002727 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002728
2729 unicode:
2730 if (args_owned) {
2731 Py_DECREF(args);
2732 args_owned = 0;
2733 }
2734 /* Fiddle args right (remove the first argidx-1 arguments) */
2735 --argidx;
2736 if (PyTuple_Check(orig_args) && argidx > 0) {
2737 PyObject *v;
2738 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2739 v = PyTuple_New(n);
2740 if (v == NULL)
2741 goto error;
2742 while (--n >= 0) {
2743 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2744 Py_INCREF(w);
2745 PyTuple_SET_ITEM(v, n, w);
2746 }
2747 args = v;
2748 } else {
2749 Py_INCREF(orig_args);
2750 args = orig_args;
2751 }
2752 /* Paste rest of format string to what we have of the result
2753 string; we reuse result for this */
2754 rescnt = res - PyString_AS_STRING(result);
2755 fmtcnt = PyString_GET_SIZE(format) - \
2756 (fmt - PyString_AS_STRING(format));
2757 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2758 Py_DECREF(args);
2759 goto error;
2760 }
2761 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2762 format = result;
2763 /* Let Unicode do its magic */
2764 result = PyUnicode_Format(format, args);
2765 Py_DECREF(format);
2766 Py_DECREF(args);
2767 return result;
2768
Guido van Rossume5372401993-03-16 12:15:04 +00002769 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002770 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002771 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002772 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002773 }
Guido van Rossume5372401993-03-16 12:15:04 +00002774 return NULL;
2775}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002776
2777
2778#ifdef INTERN_STRINGS
2779
2780static PyObject *interned;
2781
2782void
2783PyString_InternInPlace(p)
2784 PyObject **p;
2785{
2786 register PyStringObject *s = (PyStringObject *)(*p);
2787 PyObject *t;
2788 if (s == NULL || !PyString_Check(s))
2789 Py_FatalError("PyString_InternInPlace: strings only please!");
2790 if ((t = s->ob_sinterned) != NULL) {
2791 if (t == (PyObject *)s)
2792 return;
2793 Py_INCREF(t);
2794 *p = t;
2795 Py_DECREF(s);
2796 return;
2797 }
2798 if (interned == NULL) {
2799 interned = PyDict_New();
2800 if (interned == NULL)
2801 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002802 }
2803 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2804 Py_INCREF(t);
2805 *p = s->ob_sinterned = t;
2806 Py_DECREF(s);
2807 return;
2808 }
2809 t = (PyObject *)s;
2810 if (PyDict_SetItem(interned, t, t) == 0) {
2811 s->ob_sinterned = t;
2812 return;
2813 }
2814 PyErr_Clear();
2815}
2816
2817
2818PyObject *
2819PyString_InternFromString(cp)
2820 const char *cp;
2821{
2822 PyObject *s = PyString_FromString(cp);
2823 if (s == NULL)
2824 return NULL;
2825 PyString_InternInPlace(&s);
2826 return s;
2827}
2828
2829#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002830
2831void
2832PyString_Fini()
2833{
2834 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002835 for (i = 0; i < UCHAR_MAX + 1; i++) {
2836 Py_XDECREF(characters[i]);
2837 characters[i] = NULL;
2838 }
2839#ifndef DONT_SHARE_SHORT_STRINGS
2840 Py_XDECREF(nullstring);
2841 nullstring = NULL;
2842#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002843#ifdef INTERN_STRINGS
2844 if (interned) {
2845 int pos, changed;
2846 PyObject *key, *value;
2847 do {
2848 changed = 0;
2849 pos = 0;
2850 while (PyDict_Next(interned, &pos, &key, &value)) {
2851 if (key->ob_refcnt == 2 && key == value) {
2852 PyDict_DelItem(interned, key);
2853 changed = 1;
2854 }
2855 }
2856 } while (changed);
2857 }
2858#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002859}