blob: 10b43e4d999ff430eb2579f934b83c093eb21df4 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum71160aa1997-06-03 18:03:18 +000036#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000037#include <ctype.h>
38
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000039#ifdef COUNT_ALLOCS
40int null_strings, one_strings;
41#endif
42
Guido van Rossum03093a21994-09-28 15:51:32 +000043#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000044#include <limits.h>
45#else
46#ifndef UCHAR_MAX
47#define UCHAR_MAX 255
48#endif
49#endif
50
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000052#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055
56/*
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
64 not NULL.
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
70*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071PyObject *
72PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000073 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074 int size;
75{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000077#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
92 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000095
96 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000098 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000099 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000101 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102#ifdef CACHE_HASH
103 op->ob_shash = -1;
104#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000105#ifdef INTERN_STRINGS
106 op->ob_sinterned = NULL;
107#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000108 if (str != NULL)
109 memcpy(op->ob_sval, str, size);
110 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0) {
113 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 } else if (size == 1 && str != NULL) {
116 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121}
122
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123PyObject *
124PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000125 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126{
127 register unsigned int size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000129#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 if (size == 0 && (op = nullstring) != NULL) {
131#ifdef COUNT_ALLOCS
132 null_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138#ifdef COUNT_ALLOCS
139 one_strings++;
140#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000144#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145
146 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000148 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000149 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000151 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152#ifdef CACHE_HASH
153 op->ob_shash = -1;
154#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000155#ifdef INTERN_STRINGS
156 op->ob_sinterned = NULL;
157#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000158 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000159#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 if (size == 0) {
161 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000162 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000163 } else if (size == 1) {
164 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000165 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000166 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000167#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000168 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169}
170
Guido van Rossum234f9421993-06-17 12:35:49 +0000171static void
Guido van Rossume5372401993-03-16 12:15:04 +0000172string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000173 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000174{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000175 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000176}
177
Guido van Rossumd7047b31995-01-02 19:07:15 +0000178int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000179PyString_Size(op)
180 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000182 if (!PyString_Check(op)) {
183 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184 return -1;
185 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000186 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000187}
188
189/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000190PyString_AsString(op)
191 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000193 if (!PyString_Check(op)) {
194 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 return NULL;
196 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000197 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198}
199
200/* Methods */
201
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000202static int
Guido van Rossume5372401993-03-16 12:15:04 +0000203string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000204 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 FILE *fp;
206 int flags;
207{
208 int i;
209 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000210 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000212 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000214 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216
217 /* figure out which quote to use; single is prefered */
218 quote = '\'';
219 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
220 quote = '"';
221
222 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 for (i = 0; i < op->ob_size; i++) {
224 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 fprintf(fp, "\\%c", c);
227 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000228 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000232 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000233 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234}
235
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000236static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000237string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239{
240 /* XXX overflow? */
241 int newsize = 2 + 4 * op->ob_size * sizeof(char);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000242 PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000244 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000245 }
246 else {
247 register int i;
248 register char c;
249 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000250 int quote;
251
252 /* figure out which quote to use; single is prefered */
253 quote = '\'';
254 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
255 quote = '"';
256
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000257 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 for (i = 0; i < op->ob_size; i++) {
260 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 *p++ = '\\', *p++ = c;
263 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000264 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 while (*p != '\0')
266 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 }
268 else
269 *p++ = c;
270 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000271 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000273 _PyString_Resize(
274 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000275 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277}
278
279static int
Guido van Rossume5372401993-03-16 12:15:04 +0000280string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282{
283 return a->ob_size;
284}
285
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000286static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000287string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000288 register PyStringObject *a;
289 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290{
291 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 register PyStringObject *op;
293 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000294 if (PyUnicode_Check(bb))
295 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000296 PyErr_Format(PyExc_TypeError,
297 "cannot add type \"%.200s\" to string",
298 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000299 return NULL;
300 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000301#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000302 /* Optimize cases with empty left or right operand */
303 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000304 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000305 return bb;
306 }
307 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000308 Py_INCREF(a);
309 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000310 }
311 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000312 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000313 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000314 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000315 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000316 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000317 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000318#ifdef CACHE_HASH
319 op->ob_shash = -1;
320#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000321#ifdef INTERN_STRINGS
322 op->ob_sinterned = NULL;
323#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000324 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
325 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
326 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000327 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000328#undef b
329}
330
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000331static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000332string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000333 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 register int n;
335{
336 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000337 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000338 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339 if (n < 0)
340 n = 0;
341 size = a->ob_size * n;
342 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000343 Py_INCREF(a);
344 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000345 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000346 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000348 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000349 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000350 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000351 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000352#ifdef CACHE_HASH
353 op->ob_shash = -1;
354#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000355#ifdef INTERN_STRINGS
356 op->ob_sinterned = NULL;
357#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000358 for (i = 0; i < size; i += a->ob_size)
359 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
360 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000361 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362}
363
364/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
365
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000367string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000368 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000369 register int i, j; /* May be negative! */
370{
371 if (i < 0)
372 i = 0;
373 if (j < 0)
374 j = 0; /* Avoid signed/unsigned bug in next line */
375 if (j > a->ob_size)
376 j = a->ob_size;
377 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000378 Py_INCREF(a);
379 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380 }
381 if (j < i)
382 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000383 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000384}
385
Guido van Rossum9284a572000-03-07 15:53:43 +0000386static int
387string_contains(a, el)
388PyObject *a, *el;
389{
390 register char *s, *end;
391 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000392 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000393 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000394 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000395 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000396 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000397 return -1;
398 }
399 c = PyString_AsString(el)[0];
400 s = PyString_AsString(a);
401 end = s + PyString_Size(a);
402 while (s < end) {
403 if (c == *s++)
404 return 1;
405 }
406 return 0;
407}
408
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000410string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000411 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 register int i;
413{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000414 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000418 return NULL;
419 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000420 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000422#ifdef COUNT_ALLOCS
423 if (v != NULL)
424 one_strings++;
425#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000426 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000428 if (v == NULL)
429 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430 characters[c] = (PyStringObject *) v;
431 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000432 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000434 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435}
436
437static int
Guido van Rossume5372401993-03-16 12:15:04 +0000438string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440{
Guido van Rossum253919f1991-02-13 23:18:39 +0000441 int len_a = a->ob_size, len_b = b->ob_size;
442 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000443 int cmp;
444 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000445 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000446 if (cmp == 0)
447 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
448 if (cmp != 0)
449 return cmp;
450 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000451 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000452}
453
Guido van Rossum9bfef441993-03-29 10:43:31 +0000454static long
455string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000456 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000457{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000458 register int len;
459 register unsigned char *p;
460 register long x;
461
462#ifdef CACHE_HASH
463 if (a->ob_shash != -1)
464 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000465#ifdef INTERN_STRINGS
466 if (a->ob_sinterned != NULL)
467 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000468 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000469#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000470#endif
471 len = a->ob_size;
472 p = (unsigned char *) a->ob_sval;
473 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000474 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000475 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000476 x ^= a->ob_size;
477 if (x == -1)
478 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000479#ifdef CACHE_HASH
480 a->ob_shash = x;
481#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000482 return x;
483}
484
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000485static int
486string_buffer_getreadbuf(self, index, ptr)
487 PyStringObject *self;
488 int index;
489 const void **ptr;
490{
491 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000492 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000493 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000494 return -1;
495 }
496 *ptr = (void *)self->ob_sval;
497 return self->ob_size;
498}
499
500static int
501string_buffer_getwritebuf(self, index, ptr)
502 PyStringObject *self;
503 int index;
504 const void **ptr;
505{
Guido van Rossum045e6881997-09-08 18:30:11 +0000506 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000507 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000508 return -1;
509}
510
511static int
512string_buffer_getsegcount(self, lenp)
513 PyStringObject *self;
514 int *lenp;
515{
516 if ( lenp )
517 *lenp = self->ob_size;
518 return 1;
519}
520
Guido van Rossum1db70701998-10-08 02:18:52 +0000521static int
522string_buffer_getcharbuf(self, index, ptr)
523 PyStringObject *self;
524 int index;
525 const char **ptr;
526{
527 if ( index != 0 ) {
528 PyErr_SetString(PyExc_SystemError,
529 "accessing non-existent string segment");
530 return -1;
531 }
532 *ptr = self->ob_sval;
533 return self->ob_size;
534}
535
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000536static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000537 (inquiry)string_length, /*sq_length*/
538 (binaryfunc)string_concat, /*sq_concat*/
539 (intargfunc)string_repeat, /*sq_repeat*/
540 (intargfunc)string_item, /*sq_item*/
541 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000542 0, /*sq_ass_item*/
543 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000544 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000545};
546
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000547static PyBufferProcs string_as_buffer = {
548 (getreadbufferproc)string_buffer_getreadbuf,
549 (getwritebufferproc)string_buffer_getwritebuf,
550 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000551 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000552};
553
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000554
555
556#define LEFTSTRIP 0
557#define RIGHTSTRIP 1
558#define BOTHSTRIP 2
559
560
561static PyObject *
562split_whitespace(s, len, maxsplit)
563 char *s;
564 int len;
565 int maxsplit;
566{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000567 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000568 PyObject* item;
569 PyObject *list = PyList_New(0);
570
571 if (list == NULL)
572 return NULL;
573
Guido van Rossum4c08d552000-03-10 22:55:18 +0000574 for (i = j = 0; i < len; ) {
575 while (i < len && isspace(Py_CHARMASK(s[i])))
576 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000577 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000578 while (i < len && !isspace(Py_CHARMASK(s[i])))
579 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000580 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000581 if (maxsplit-- <= 0)
582 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000583 item = PyString_FromStringAndSize(s+j, (int)(i-j));
584 if (item == NULL)
585 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000586 err = PyList_Append(list, item);
587 Py_DECREF(item);
588 if (err < 0)
589 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000590 while (i < len && isspace(Py_CHARMASK(s[i])))
591 i++;
592 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000593 }
594 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000595 if (j < len) {
596 item = PyString_FromStringAndSize(s+j, (int)(len - j));
597 if (item == NULL)
598 goto finally;
599 err = PyList_Append(list, item);
600 Py_DECREF(item);
601 if (err < 0)
602 goto finally;
603 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000604 return list;
605 finally:
606 Py_DECREF(list);
607 return NULL;
608}
609
610
611static char split__doc__[] =
612"S.split([sep [,maxsplit]]) -> list of strings\n\
613\n\
614Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000615delimiter string. If maxsplit is given, at most maxsplit\n\
616splits are done. If sep is not specified, any whitespace string\n\
617is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000618
619static PyObject *
620string_split(self, args)
621 PyStringObject *self;
622 PyObject *args;
623{
624 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 int maxsplit = -1;
626 const char *s = PyString_AS_STRING(self), *sub;
627 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000628
Guido van Rossum4c08d552000-03-10 22:55:18 +0000629 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000631 if (maxsplit < 0)
632 maxsplit = INT_MAX;
633 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000634 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000635 if (PyString_Check(subobj)) {
636 sub = PyString_AS_STRING(subobj);
637 n = PyString_GET_SIZE(subobj);
638 }
639 else if (PyUnicode_Check(subobj))
640 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
641 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
642 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000643 if (n == 0) {
644 PyErr_SetString(PyExc_ValueError, "empty separator");
645 return NULL;
646 }
647
648 list = PyList_New(0);
649 if (list == NULL)
650 return NULL;
651
652 i = j = 0;
653 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000654 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000655 if (maxsplit-- <= 0)
656 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000657 item = PyString_FromStringAndSize(s+j, (int)(i-j));
658 if (item == NULL)
659 goto fail;
660 err = PyList_Append(list, item);
661 Py_DECREF(item);
662 if (err < 0)
663 goto fail;
664 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665 }
666 else
667 i++;
668 }
669 item = PyString_FromStringAndSize(s+j, (int)(len-j));
670 if (item == NULL)
671 goto fail;
672 err = PyList_Append(list, item);
673 Py_DECREF(item);
674 if (err < 0)
675 goto fail;
676
677 return list;
678
679 fail:
680 Py_DECREF(list);
681 return NULL;
682}
683
684
685static char join__doc__[] =
686"S.join(sequence) -> string\n\
687\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000688Return a string which is the concatenation of the strings in the\n\
689sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690
691static PyObject *
692string_join(self, args)
693 PyStringObject *self;
694 PyObject *args;
695{
696 char *sep = PyString_AS_STRING(self);
697 int seplen = PyString_GET_SIZE(self);
698 PyObject *res = NULL;
699 int reslen = 0;
700 char *p;
701 int seqlen = 0;
702 int sz = 100;
703 int i, slen;
704 PyObject *seq;
705
Guido van Rossum43713e52000-02-29 13:59:29 +0000706 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000707 return NULL;
708
709 seqlen = PySequence_Length(seq);
710 if (seqlen < 0 && PyErr_Occurred())
711 return NULL;
712
713 if (seqlen == 1) {
714 /* Optimization if there's only one item */
715 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000716 if (item == NULL)
717 return NULL;
718 if (!PyString_Check(item) &&
719 !PyUnicode_Check(item)) {
720 PyErr_SetString(PyExc_TypeError,
721 "first argument must be sequence of strings");
722 Py_DECREF(item);
723 return NULL;
724 }
725 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000726 }
727 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
728 return NULL;
729 p = PyString_AsString(res);
730
731 /* optimize for lists. all others (tuples and arbitrary sequences)
732 * just use the abstract interface.
733 */
734 if (PyList_Check(seq)) {
735 for (i = 0; i < seqlen; i++) {
736 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (!PyString_Check(item)){
738 if (PyUnicode_Check(item)) {
739 Py_DECREF(res);
740 return PyUnicode_Join(
741 (PyObject *)self,
742 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000743 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000744 PyErr_Format(PyExc_TypeError,
745 "sequence item %i not a string",
746 i);
747 goto finally;
748 }
749 slen = PyString_GET_SIZE(item);
750 while (reslen + slen + seplen >= sz) {
751 if (_PyString_Resize(&res, sz*2))
752 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000753 sz *= 2;
754 p = PyString_AsString(res) + reslen;
755 }
756 if (i > 0) {
757 memcpy(p, sep, seplen);
758 p += seplen;
759 reslen += seplen;
760 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000761 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 p += slen;
763 reslen += slen;
764 }
765 }
766 else {
767 for (i = 0; i < seqlen; i++) {
768 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000769 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000770 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000771 if (!PyString_Check(item)){
772 if (PyUnicode_Check(item)) {
773 Py_DECREF(res);
774 Py_DECREF(item);
775 return PyUnicode_Join(
776 (PyObject *)self,
777 seq);
778 }
779 Py_DECREF(item);
780 PyErr_Format(PyExc_TypeError,
781 "sequence item %i not a string",
782 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000783 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000784 }
785 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000786 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000787 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000788 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000790 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000791 sz *= 2;
792 p = PyString_AsString(res) + reslen;
793 }
794 if (i > 0) {
795 memcpy(p, sep, seplen);
796 p += seplen;
797 reslen += seplen;
798 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000799 memcpy(p, PyString_AS_STRING(item), slen);
800 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000801 p += slen;
802 reslen += slen;
803 }
804 }
805 if (_PyString_Resize(&res, reslen))
806 goto finally;
807 return res;
808
809 finally:
810 Py_DECREF(res);
811 return NULL;
812}
813
814
815
816static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000818 PyStringObject *self;
819 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000820 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000822 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000823 int len = PyString_GET_SIZE(self);
824 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000825 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000826
Guido van Rossumc6821402000-05-08 14:08:05 +0000827 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
828 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000829 return -2;
830 if (PyString_Check(subobj)) {
831 sub = PyString_AS_STRING(subobj);
832 n = PyString_GET_SIZE(subobj);
833 }
834 else if (PyUnicode_Check(subobj))
835 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
836 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000837 return -2;
838
839 if (last > len)
840 last = len;
841 if (last < 0)
842 last += len;
843 if (last < 0)
844 last = 0;
845 if (i < 0)
846 i += len;
847 if (i < 0)
848 i = 0;
849
Guido van Rossum4c08d552000-03-10 22:55:18 +0000850 if (dir > 0) {
851 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000852 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000853 last -= n;
854 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000855 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000856 return (long)i;
857 }
858 else {
859 int j;
860
861 if (n == 0 && i <= last)
862 return (long)last;
863 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000864 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000865 return (long)j;
866 }
867
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868 return -1;
869}
870
871
872static char find__doc__[] =
873"S.find(sub [,start [,end]]) -> int\n\
874\n\
875Return the lowest index in S where substring sub is found,\n\
876such that sub is contained within s[start,end]. Optional\n\
877arguments start and end are interpreted as in slice notation.\n\
878\n\
879Return -1 on failure.";
880
881static PyObject *
882string_find(self, args)
883 PyStringObject *self;
884 PyObject *args;
885{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000886 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000887 if (result == -2)
888 return NULL;
889 return PyInt_FromLong(result);
890}
891
892
893static char index__doc__[] =
894"S.index(sub [,start [,end]]) -> int\n\
895\n\
896Like S.find() but raise ValueError when the substring is not found.";
897
898static PyObject *
899string_index(self, args)
900 PyStringObject *self;
901 PyObject *args;
902{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000903 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000904 if (result == -2)
905 return NULL;
906 if (result == -1) {
907 PyErr_SetString(PyExc_ValueError,
908 "substring not found in string.index");
909 return NULL;
910 }
911 return PyInt_FromLong(result);
912}
913
914
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000915static char rfind__doc__[] =
916"S.rfind(sub [,start [,end]]) -> int\n\
917\n\
918Return the highest index in S where substring sub is found,\n\
919such that sub is contained within s[start,end]. Optional\n\
920arguments start and end are interpreted as in slice notation.\n\
921\n\
922Return -1 on failure.";
923
924static PyObject *
925string_rfind(self, args)
926 PyStringObject *self;
927 PyObject *args;
928{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000929 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000930 if (result == -2)
931 return NULL;
932 return PyInt_FromLong(result);
933}
934
935
936static char rindex__doc__[] =
937"S.rindex(sub [,start [,end]]) -> int\n\
938\n\
939Like S.rfind() but raise ValueError when the substring is not found.";
940
941static PyObject *
942string_rindex(self, args)
943 PyStringObject *self;
944 PyObject *args;
945{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000946 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000947 if (result == -2)
948 return NULL;
949 if (result == -1) {
950 PyErr_SetString(PyExc_ValueError,
951 "substring not found in string.rindex");
952 return NULL;
953 }
954 return PyInt_FromLong(result);
955}
956
957
958static PyObject *
959do_strip(self, args, striptype)
960 PyStringObject *self;
961 PyObject *args;
962 int striptype;
963{
964 char *s = PyString_AS_STRING(self);
965 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000966
Guido van Rossum43713e52000-02-29 13:59:29 +0000967 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000968 return NULL;
969
970 i = 0;
971 if (striptype != RIGHTSTRIP) {
972 while (i < len && isspace(Py_CHARMASK(s[i]))) {
973 i++;
974 }
975 }
976
977 j = len;
978 if (striptype != LEFTSTRIP) {
979 do {
980 j--;
981 } while (j >= i && isspace(Py_CHARMASK(s[j])));
982 j++;
983 }
984
985 if (i == 0 && j == len) {
986 Py_INCREF(self);
987 return (PyObject*)self;
988 }
989 else
990 return PyString_FromStringAndSize(s+i, j-i);
991}
992
993
994static char strip__doc__[] =
995"S.strip() -> string\n\
996\n\
997Return a copy of the string S with leading and trailing\n\
998whitespace removed.";
999
1000static PyObject *
1001string_strip(self, args)
1002 PyStringObject *self;
1003 PyObject *args;
1004{
1005 return do_strip(self, args, BOTHSTRIP);
1006}
1007
1008
1009static char lstrip__doc__[] =
1010"S.lstrip() -> string\n\
1011\n\
1012Return a copy of the string S with leading whitespace removed.";
1013
1014static PyObject *
1015string_lstrip(self, args)
1016 PyStringObject *self;
1017 PyObject *args;
1018{
1019 return do_strip(self, args, LEFTSTRIP);
1020}
1021
1022
1023static char rstrip__doc__[] =
1024"S.rstrip() -> string\n\
1025\n\
1026Return a copy of the string S with trailing whitespace removed.";
1027
1028static PyObject *
1029string_rstrip(self, args)
1030 PyStringObject *self;
1031 PyObject *args;
1032{
1033 return do_strip(self, args, RIGHTSTRIP);
1034}
1035
1036
1037static char lower__doc__[] =
1038"S.lower() -> string\n\
1039\n\
1040Return a copy of the string S converted to lowercase.";
1041
1042static PyObject *
1043string_lower(self, args)
1044 PyStringObject *self;
1045 PyObject *args;
1046{
1047 char *s = PyString_AS_STRING(self), *s_new;
1048 int i, n = PyString_GET_SIZE(self);
1049 PyObject *new;
1050
Guido van Rossum43713e52000-02-29 13:59:29 +00001051 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 return NULL;
1053 new = PyString_FromStringAndSize(NULL, n);
1054 if (new == NULL)
1055 return NULL;
1056 s_new = PyString_AsString(new);
1057 for (i = 0; i < n; i++) {
1058 int c = Py_CHARMASK(*s++);
1059 if (isupper(c)) {
1060 *s_new = tolower(c);
1061 } else
1062 *s_new = c;
1063 s_new++;
1064 }
1065 return new;
1066}
1067
1068
1069static char upper__doc__[] =
1070"S.upper() -> string\n\
1071\n\
1072Return a copy of the string S converted to uppercase.";
1073
1074static PyObject *
1075string_upper(self, args)
1076 PyStringObject *self;
1077 PyObject *args;
1078{
1079 char *s = PyString_AS_STRING(self), *s_new;
1080 int i, n = PyString_GET_SIZE(self);
1081 PyObject *new;
1082
Guido van Rossum43713e52000-02-29 13:59:29 +00001083 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001084 return NULL;
1085 new = PyString_FromStringAndSize(NULL, n);
1086 if (new == NULL)
1087 return NULL;
1088 s_new = PyString_AsString(new);
1089 for (i = 0; i < n; i++) {
1090 int c = Py_CHARMASK(*s++);
1091 if (islower(c)) {
1092 *s_new = toupper(c);
1093 } else
1094 *s_new = c;
1095 s_new++;
1096 }
1097 return new;
1098}
1099
1100
Guido van Rossum4c08d552000-03-10 22:55:18 +00001101static char title__doc__[] =
1102"S.title() -> string\n\
1103\n\
1104Return a titlecased version of S, i.e. words start with uppercase\n\
1105characters, all remaining cased characters have lowercase.";
1106
1107static PyObject*
1108string_title(PyUnicodeObject *self, PyObject *args)
1109{
1110 char *s = PyString_AS_STRING(self), *s_new;
1111 int i, n = PyString_GET_SIZE(self);
1112 int previous_is_cased = 0;
1113 PyObject *new;
1114
1115 if (!PyArg_ParseTuple(args, ":title"))
1116 return NULL;
1117 new = PyString_FromStringAndSize(NULL, n);
1118 if (new == NULL)
1119 return NULL;
1120 s_new = PyString_AsString(new);
1121 for (i = 0; i < n; i++) {
1122 int c = Py_CHARMASK(*s++);
1123 if (islower(c)) {
1124 if (!previous_is_cased)
1125 c = toupper(c);
1126 previous_is_cased = 1;
1127 } else if (isupper(c)) {
1128 if (previous_is_cased)
1129 c = tolower(c);
1130 previous_is_cased = 1;
1131 } else
1132 previous_is_cased = 0;
1133 *s_new++ = c;
1134 }
1135 return new;
1136}
1137
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138static char capitalize__doc__[] =
1139"S.capitalize() -> string\n\
1140\n\
1141Return a copy of the string S with only its first character\n\
1142capitalized.";
1143
1144static PyObject *
1145string_capitalize(self, args)
1146 PyStringObject *self;
1147 PyObject *args;
1148{
1149 char *s = PyString_AS_STRING(self), *s_new;
1150 int i, n = PyString_GET_SIZE(self);
1151 PyObject *new;
1152
Guido van Rossum43713e52000-02-29 13:59:29 +00001153 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154 return NULL;
1155 new = PyString_FromStringAndSize(NULL, n);
1156 if (new == NULL)
1157 return NULL;
1158 s_new = PyString_AsString(new);
1159 if (0 < n) {
1160 int c = Py_CHARMASK(*s++);
1161 if (islower(c))
1162 *s_new = toupper(c);
1163 else
1164 *s_new = c;
1165 s_new++;
1166 }
1167 for (i = 1; i < n; i++) {
1168 int c = Py_CHARMASK(*s++);
1169 if (isupper(c))
1170 *s_new = tolower(c);
1171 else
1172 *s_new = c;
1173 s_new++;
1174 }
1175 return new;
1176}
1177
1178
1179static char count__doc__[] =
1180"S.count(sub[, start[, end]]) -> int\n\
1181\n\
1182Return the number of occurrences of substring sub in string\n\
1183S[start:end]. Optional arguments start and end are\n\
1184interpreted as in slice notation.";
1185
1186static PyObject *
1187string_count(self, args)
1188 PyStringObject *self;
1189 PyObject *args;
1190{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001191 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001192 int len = PyString_GET_SIZE(self), n;
1193 int i = 0, last = INT_MAX;
1194 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001195 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001196
Guido van Rossumc6821402000-05-08 14:08:05 +00001197 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1198 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001200
Guido van Rossum4c08d552000-03-10 22:55:18 +00001201 if (PyString_Check(subobj)) {
1202 sub = PyString_AS_STRING(subobj);
1203 n = PyString_GET_SIZE(subobj);
1204 }
1205 else if (PyUnicode_Check(subobj))
1206 return PyInt_FromLong(
1207 PyUnicode_Count((PyObject *)self, subobj, i, last));
1208 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1209 return NULL;
1210
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001211 if (last > len)
1212 last = len;
1213 if (last < 0)
1214 last += len;
1215 if (last < 0)
1216 last = 0;
1217 if (i < 0)
1218 i += len;
1219 if (i < 0)
1220 i = 0;
1221 m = last + 1 - n;
1222 if (n == 0)
1223 return PyInt_FromLong((long) (m-i));
1224
1225 r = 0;
1226 while (i < m) {
1227 if (!memcmp(s+i, sub, n)) {
1228 r++;
1229 i += n;
1230 } else {
1231 i++;
1232 }
1233 }
1234 return PyInt_FromLong((long) r);
1235}
1236
1237
1238static char swapcase__doc__[] =
1239"S.swapcase() -> string\n\
1240\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001241Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242converted to lowercase and vice versa.";
1243
1244static PyObject *
1245string_swapcase(self, args)
1246 PyStringObject *self;
1247 PyObject *args;
1248{
1249 char *s = PyString_AS_STRING(self), *s_new;
1250 int i, n = PyString_GET_SIZE(self);
1251 PyObject *new;
1252
Guido van Rossum43713e52000-02-29 13:59:29 +00001253 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001254 return NULL;
1255 new = PyString_FromStringAndSize(NULL, n);
1256 if (new == NULL)
1257 return NULL;
1258 s_new = PyString_AsString(new);
1259 for (i = 0; i < n; i++) {
1260 int c = Py_CHARMASK(*s++);
1261 if (islower(c)) {
1262 *s_new = toupper(c);
1263 }
1264 else if (isupper(c)) {
1265 *s_new = tolower(c);
1266 }
1267 else
1268 *s_new = c;
1269 s_new++;
1270 }
1271 return new;
1272}
1273
1274
1275static char translate__doc__[] =
1276"S.translate(table [,deletechars]) -> string\n\
1277\n\
1278Return a copy of the string S, where all characters occurring\n\
1279in the optional argument deletechars are removed, and the\n\
1280remaining characters have been mapped through the given\n\
1281translation table, which must be a string of length 256.";
1282
1283static PyObject *
1284string_translate(self, args)
1285 PyStringObject *self;
1286 PyObject *args;
1287{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001288 register char *input, *output;
1289 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 register int i, c, changed = 0;
1291 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293 int inlen, tablen, dellen = 0;
1294 PyObject *result;
1295 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001297
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 if (!PyArg_ParseTuple(args, "O|O:translate",
1299 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301
1302 if (PyString_Check(tableobj)) {
1303 table1 = PyString_AS_STRING(tableobj);
1304 tablen = PyString_GET_SIZE(tableobj);
1305 }
1306 else if (PyUnicode_Check(tableobj)) {
1307 /* Unicode .translate() does not support the deletechars
1308 parameter; instead a mapping to None will cause characters
1309 to be deleted. */
1310 if (delobj != NULL) {
1311 PyErr_SetString(PyExc_TypeError,
1312 "deletions are implemented differently for unicode");
1313 return NULL;
1314 }
1315 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1316 }
1317 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319
1320 if (delobj != NULL) {
1321 if (PyString_Check(delobj)) {
1322 del_table = PyString_AS_STRING(delobj);
1323 dellen = PyString_GET_SIZE(delobj);
1324 }
1325 else if (PyUnicode_Check(delobj)) {
1326 PyErr_SetString(PyExc_TypeError,
1327 "deletions are implemented differently for unicode");
1328 return NULL;
1329 }
1330 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1331 return NULL;
1332
1333 if (tablen != 256) {
1334 PyErr_SetString(PyExc_ValueError,
1335 "translation table must be 256 characters long");
1336 return NULL;
1337 }
1338 }
1339 else {
1340 del_table = NULL;
1341 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342 }
1343
1344 table = table1;
1345 inlen = PyString_Size(input_obj);
1346 result = PyString_FromStringAndSize((char *)NULL, inlen);
1347 if (result == NULL)
1348 return NULL;
1349 output_start = output = PyString_AsString(result);
1350 input = PyString_AsString(input_obj);
1351
1352 if (dellen == 0) {
1353 /* If no deletions are required, use faster code */
1354 for (i = inlen; --i >= 0; ) {
1355 c = Py_CHARMASK(*input++);
1356 if (Py_CHARMASK((*output++ = table[c])) != c)
1357 changed = 1;
1358 }
1359 if (changed)
1360 return result;
1361 Py_DECREF(result);
1362 Py_INCREF(input_obj);
1363 return input_obj;
1364 }
1365
1366 for (i = 0; i < 256; i++)
1367 trans_table[i] = Py_CHARMASK(table[i]);
1368
1369 for (i = 0; i < dellen; i++)
1370 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1371
1372 for (i = inlen; --i >= 0; ) {
1373 c = Py_CHARMASK(*input++);
1374 if (trans_table[c] != -1)
1375 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1376 continue;
1377 changed = 1;
1378 }
1379 if (!changed) {
1380 Py_DECREF(result);
1381 Py_INCREF(input_obj);
1382 return input_obj;
1383 }
1384 /* Fix the size of the resulting string */
1385 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1386 return NULL;
1387 return result;
1388}
1389
1390
1391/* What follows is used for implementing replace(). Perry Stoll. */
1392
1393/*
1394 mymemfind
1395
1396 strstr replacement for arbitrary blocks of memory.
1397
Barry Warsaw51ac5802000-03-20 16:36:48 +00001398 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 contents of memory pointed to by PAT. Returns the index into MEM if
1400 found, or -1 if not found. If len of PAT is greater than length of
1401 MEM, the function returns -1.
1402*/
1403static int
1404mymemfind(mem, len, pat, pat_len)
1405 char *mem;
1406 int len;
1407 char *pat;
1408 int pat_len;
1409{
1410 register int ii;
1411
1412 /* pattern can not occur in the last pat_len-1 chars */
1413 len -= pat_len;
1414
1415 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001416 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417 return ii;
1418 }
1419 }
1420 return -1;
1421}
1422
1423/*
1424 mymemcnt
1425
1426 Return the number of distinct times PAT is found in MEM.
1427 meaning mem=1111 and pat==11 returns 2.
1428 mem=11111 and pat==11 also return 2.
1429 */
1430static int
1431mymemcnt(mem, len, pat, pat_len)
1432 char *mem;
1433 int len;
1434 char *pat;
1435 int pat_len;
1436{
1437 register int offset = 0;
1438 int nfound = 0;
1439
1440 while (len >= 0) {
1441 offset = mymemfind(mem, len, pat, pat_len);
1442 if (offset == -1)
1443 break;
1444 mem += offset + pat_len;
1445 len -= offset + pat_len;
1446 nfound++;
1447 }
1448 return nfound;
1449}
1450
1451/*
1452 mymemreplace
1453
1454 Return a string in which all occurences of PAT in memory STR are
1455 replaced with SUB.
1456
1457 If length of PAT is less than length of STR or there are no occurences
1458 of PAT in STR, then the original string is returned. Otherwise, a new
1459 string is allocated here and returned.
1460
1461 on return, out_len is:
1462 the length of output string, or
1463 -1 if the input string is returned, or
1464 unchanged if an error occurs (no memory).
1465
1466 return value is:
1467 the new string allocated locally, or
1468 NULL if an error occurred.
1469*/
1470static char *
1471mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1472 char *str;
1473 int len; /* input string */
1474 char *pat;
1475 int pat_len; /* pattern string to find */
1476 char *sub;
1477 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001478 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479 int *out_len;
1480
1481{
1482 char *out_s;
1483 char *new_s;
1484 int nfound, offset, new_len;
1485
1486 if (len == 0 || pat_len > len)
1487 goto return_same;
1488
1489 /* find length of output string */
1490 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491 if (count < 0)
1492 count = INT_MAX;
1493 else if (nfound > count)
1494 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 if (nfound == 0)
1496 goto return_same;
1497 new_len = len + nfound*(sub_len - pat_len);
1498
Guido van Rossumb18618d2000-05-03 23:44:39 +00001499 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500 if (new_s == NULL) return NULL;
1501
1502 *out_len = new_len;
1503 out_s = new_s;
1504
1505 while (len > 0) {
1506 /* find index of next instance of pattern */
1507 offset = mymemfind(str, len, pat, pat_len);
1508 /* if not found, break out of loop */
1509 if (offset == -1) break;
1510
1511 /* copy non matching part of input string */
1512 memcpy(new_s, str, offset); /* copy part of str before pat */
1513 str += offset + pat_len; /* move str past pattern */
1514 len -= offset + pat_len; /* reduce length of str remaining */
1515
1516 /* copy substitute into the output string */
1517 new_s += offset; /* move new_s to dest for sub string */
1518 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1519 new_s += sub_len; /* offset new_s past sub string */
1520
1521 /* break when we've done count replacements */
1522 if (--count == 0) break;
1523 }
1524 /* copy any remaining values into output string */
1525 if (len > 0)
1526 memcpy(new_s, str, len);
1527 return out_s;
1528
1529 return_same:
1530 *out_len = -1;
1531 return str;
1532}
1533
1534
1535static char replace__doc__[] =
1536"S.replace (old, new[, maxsplit]) -> string\n\
1537\n\
1538Return a copy of string S with all occurrences of substring\n\
1539old replaced by new. If the optional argument maxsplit is\n\
1540given, only the first maxsplit occurrences are replaced.";
1541
1542static PyObject *
1543string_replace(self, args)
1544 PyStringObject *self;
1545 PyObject *args;
1546{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001547 const char *str = PyString_AS_STRING(self), *sub, *repl;
1548 char *new_s;
1549 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1550 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 if (!PyArg_ParseTuple(args, "OO|i:replace",
1555 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001557
1558 if (PyString_Check(subobj)) {
1559 sub = PyString_AS_STRING(subobj);
1560 sub_len = PyString_GET_SIZE(subobj);
1561 }
1562 else if (PyUnicode_Check(subobj))
1563 return PyUnicode_Replace((PyObject *)self,
1564 subobj, replobj, count);
1565 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1566 return NULL;
1567
1568 if (PyString_Check(replobj)) {
1569 repl = PyString_AS_STRING(replobj);
1570 repl_len = PyString_GET_SIZE(replobj);
1571 }
1572 else if (PyUnicode_Check(replobj))
1573 return PyUnicode_Replace((PyObject *)self,
1574 subobj, replobj, count);
1575 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1576 return NULL;
1577
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001578 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001579 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001580 return NULL;
1581 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001582 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583 if (new_s == NULL) {
1584 PyErr_NoMemory();
1585 return NULL;
1586 }
1587 if (out_len == -1) {
1588 /* we're returning another reference to self */
1589 new = (PyObject*)self;
1590 Py_INCREF(new);
1591 }
1592 else {
1593 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001594 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001595 }
1596 return new;
1597}
1598
1599
1600static char startswith__doc__[] =
1601"S.startswith(prefix[, start[, end]]) -> int\n\
1602\n\
1603Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1604optional start, test S beginning at that position. With optional end, stop\n\
1605comparing S at that position.";
1606
1607static PyObject *
1608string_startswith(self, args)
1609 PyStringObject *self;
1610 PyObject *args;
1611{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001612 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 int plen;
1616 int start = 0;
1617 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001618 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619
Guido van Rossumc6821402000-05-08 14:08:05 +00001620 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1621 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622 return NULL;
1623 if (PyString_Check(subobj)) {
1624 prefix = PyString_AS_STRING(subobj);
1625 plen = PyString_GET_SIZE(subobj);
1626 }
1627 else if (PyUnicode_Check(subobj))
1628 return PyInt_FromLong(
1629 PyUnicode_Tailmatch((PyObject *)self,
1630 subobj, start, end, -1));
1631 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 return NULL;
1633
1634 /* adopt Java semantics for index out of range. it is legal for
1635 * offset to be == plen, but this only returns true if prefix is
1636 * the empty string.
1637 */
1638 if (start < 0 || start+plen > len)
1639 return PyInt_FromLong(0);
1640
1641 if (!memcmp(str+start, prefix, plen)) {
1642 /* did the match end after the specified end? */
1643 if (end < 0)
1644 return PyInt_FromLong(1);
1645 else if (end - start < plen)
1646 return PyInt_FromLong(0);
1647 else
1648 return PyInt_FromLong(1);
1649 }
1650 else return PyInt_FromLong(0);
1651}
1652
1653
1654static char endswith__doc__[] =
1655"S.endswith(suffix[, start[, end]]) -> int\n\
1656\n\
1657Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1658optional start, test S beginning at that position. With optional end, stop\n\
1659comparing S at that position.";
1660
1661static PyObject *
1662string_endswith(self, args)
1663 PyStringObject *self;
1664 PyObject *args;
1665{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001666 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001667 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668 const char* suffix;
1669 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670 int start = 0;
1671 int end = -1;
1672 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674
Guido van Rossumc6821402000-05-08 14:08:05 +00001675 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1676 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 return NULL;
1678 if (PyString_Check(subobj)) {
1679 suffix = PyString_AS_STRING(subobj);
1680 slen = PyString_GET_SIZE(subobj);
1681 }
1682 else if (PyUnicode_Check(subobj))
1683 return PyInt_FromLong(
1684 PyUnicode_Tailmatch((PyObject *)self,
1685 subobj, start, end, +1));
1686 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687 return NULL;
1688
Guido van Rossum4c08d552000-03-10 22:55:18 +00001689 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690 return PyInt_FromLong(0);
1691
1692 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694
Guido van Rossum4c08d552000-03-10 22:55:18 +00001695 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696 return PyInt_FromLong(1);
1697 else return PyInt_FromLong(0);
1698}
1699
1700
Guido van Rossum4c08d552000-03-10 22:55:18 +00001701static char expandtabs__doc__[] =
1702"S.expandtabs([tabsize]) -> string\n\
1703\n\
1704Return a copy of S where all tab characters are expanded using spaces.\n\
1705If tabsize is not given, a tab size of 8 characters is assumed.";
1706
1707static PyObject*
1708string_expandtabs(PyStringObject *self, PyObject *args)
1709{
1710 const char *e, *p;
1711 char *q;
1712 int i, j;
1713 PyObject *u;
1714 int tabsize = 8;
1715
1716 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1717 return NULL;
1718
1719 /* First pass: determine size of ouput string */
1720 i = j = 0;
1721 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1722 for (p = PyString_AS_STRING(self); p < e; p++)
1723 if (*p == '\t') {
1724 if (tabsize > 0)
1725 j += tabsize - (j % tabsize);
1726 }
1727 else {
1728 j++;
1729 if (*p == '\n' || *p == '\r') {
1730 i += j;
1731 j = 0;
1732 }
1733 }
1734
1735 /* Second pass: create output string and fill it */
1736 u = PyString_FromStringAndSize(NULL, i + j);
1737 if (!u)
1738 return NULL;
1739
1740 j = 0;
1741 q = PyString_AS_STRING(u);
1742
1743 for (p = PyString_AS_STRING(self); p < e; p++)
1744 if (*p == '\t') {
1745 if (tabsize > 0) {
1746 i = tabsize - (j % tabsize);
1747 j += i;
1748 while (i--)
1749 *q++ = ' ';
1750 }
1751 }
1752 else {
1753 j++;
1754 *q++ = *p;
1755 if (*p == '\n' || *p == '\r')
1756 j = 0;
1757 }
1758
1759 return u;
1760}
1761
1762static
1763PyObject *pad(PyStringObject *self,
1764 int left,
1765 int right,
1766 char fill)
1767{
1768 PyObject *u;
1769
1770 if (left < 0)
1771 left = 0;
1772 if (right < 0)
1773 right = 0;
1774
1775 if (left == 0 && right == 0) {
1776 Py_INCREF(self);
1777 return (PyObject *)self;
1778 }
1779
1780 u = PyString_FromStringAndSize(NULL,
1781 left + PyString_GET_SIZE(self) + right);
1782 if (u) {
1783 if (left)
1784 memset(PyString_AS_STRING(u), fill, left);
1785 memcpy(PyString_AS_STRING(u) + left,
1786 PyString_AS_STRING(self),
1787 PyString_GET_SIZE(self));
1788 if (right)
1789 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1790 fill, right);
1791 }
1792
1793 return u;
1794}
1795
1796static char ljust__doc__[] =
1797"S.ljust(width) -> string\n\
1798\n\
1799Return S left justified in a string of length width. Padding is\n\
1800done using spaces.";
1801
1802static PyObject *
1803string_ljust(PyStringObject *self, PyObject *args)
1804{
1805 int width;
1806 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1807 return NULL;
1808
1809 if (PyString_GET_SIZE(self) >= width) {
1810 Py_INCREF(self);
1811 return (PyObject*) self;
1812 }
1813
1814 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1815}
1816
1817
1818static char rjust__doc__[] =
1819"S.rjust(width) -> string\n\
1820\n\
1821Return S right justified in a string of length width. Padding is\n\
1822done using spaces.";
1823
1824static PyObject *
1825string_rjust(PyStringObject *self, PyObject *args)
1826{
1827 int width;
1828 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1829 return NULL;
1830
1831 if (PyString_GET_SIZE(self) >= width) {
1832 Py_INCREF(self);
1833 return (PyObject*) self;
1834 }
1835
1836 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1837}
1838
1839
1840static char center__doc__[] =
1841"S.center(width) -> string\n\
1842\n\
1843Return S centered in a string of length width. Padding is done\n\
1844using spaces.";
1845
1846static PyObject *
1847string_center(PyStringObject *self, PyObject *args)
1848{
1849 int marg, left;
1850 int width;
1851
1852 if (!PyArg_ParseTuple(args, "i:center", &width))
1853 return NULL;
1854
1855 if (PyString_GET_SIZE(self) >= width) {
1856 Py_INCREF(self);
1857 return (PyObject*) self;
1858 }
1859
1860 marg = width - PyString_GET_SIZE(self);
1861 left = marg / 2 + (marg & width & 1);
1862
1863 return pad(self, left, marg - left, ' ');
1864}
1865
1866#if 0
1867static char zfill__doc__[] =
1868"S.zfill(width) -> string\n\
1869\n\
1870Pad a numeric string x with zeros on the left, to fill a field\n\
1871of the specified width. The string x is never truncated.";
1872
1873static PyObject *
1874string_zfill(PyStringObject *self, PyObject *args)
1875{
1876 int fill;
1877 PyObject *u;
1878 char *str;
1879
1880 int width;
1881 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1882 return NULL;
1883
1884 if (PyString_GET_SIZE(self) >= width) {
1885 Py_INCREF(self);
1886 return (PyObject*) self;
1887 }
1888
1889 fill = width - PyString_GET_SIZE(self);
1890
1891 u = pad(self, fill, 0, '0');
1892 if (u == NULL)
1893 return NULL;
1894
1895 str = PyString_AS_STRING(u);
1896 if (str[fill] == '+' || str[fill] == '-') {
1897 /* move sign to beginning of string */
1898 str[0] = str[fill];
1899 str[fill] = '0';
1900 }
1901
1902 return u;
1903}
1904#endif
1905
1906static char isspace__doc__[] =
1907"S.isspace() -> int\n\
1908\n\
1909Return 1 if there are only whitespace characters in S,\n\
19100 otherwise.";
1911
1912static PyObject*
1913string_isspace(PyStringObject *self, PyObject *args)
1914{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001915 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1916 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001917
1918 if (!PyArg_NoArgs(args))
1919 return NULL;
1920
1921 /* Shortcut for single character strings */
1922 if (PyString_GET_SIZE(self) == 1 &&
1923 isspace(*p))
1924 return PyInt_FromLong(1);
1925
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001926 /* Special case for empty strings */
1927 if (PyString_GET_SIZE(self) == 0)
1928 return PyInt_FromLong(0);
1929
Guido van Rossum4c08d552000-03-10 22:55:18 +00001930 e = p + PyString_GET_SIZE(self);
1931 for (; p < e; p++) {
1932 if (!isspace(*p))
1933 return PyInt_FromLong(0);
1934 }
1935 return PyInt_FromLong(1);
1936}
1937
1938
1939static char isdigit__doc__[] =
1940"S.isdigit() -> int\n\
1941\n\
1942Return 1 if there are only digit characters in S,\n\
19430 otherwise.";
1944
1945static PyObject*
1946string_isdigit(PyStringObject *self, PyObject *args)
1947{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001948 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1949 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001950
1951 if (!PyArg_NoArgs(args))
1952 return NULL;
1953
1954 /* Shortcut for single character strings */
1955 if (PyString_GET_SIZE(self) == 1 &&
1956 isdigit(*p))
1957 return PyInt_FromLong(1);
1958
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001959 /* Special case for empty strings */
1960 if (PyString_GET_SIZE(self) == 0)
1961 return PyInt_FromLong(0);
1962
Guido van Rossum4c08d552000-03-10 22:55:18 +00001963 e = p + PyString_GET_SIZE(self);
1964 for (; p < e; p++) {
1965 if (!isdigit(*p))
1966 return PyInt_FromLong(0);
1967 }
1968 return PyInt_FromLong(1);
1969}
1970
1971
1972static char islower__doc__[] =
1973"S.islower() -> int\n\
1974\n\
1975Return 1 if all cased characters in S are lowercase and there is\n\
1976at least one cased character in S, 0 otherwise.";
1977
1978static PyObject*
1979string_islower(PyStringObject *self, PyObject *args)
1980{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001981 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1982 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001983 int cased;
1984
1985 if (!PyArg_NoArgs(args))
1986 return NULL;
1987
1988 /* Shortcut for single character strings */
1989 if (PyString_GET_SIZE(self) == 1)
1990 return PyInt_FromLong(islower(*p) != 0);
1991
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001992 /* Special case for empty strings */
1993 if (PyString_GET_SIZE(self) == 0)
1994 return PyInt_FromLong(0);
1995
Guido van Rossum4c08d552000-03-10 22:55:18 +00001996 e = p + PyString_GET_SIZE(self);
1997 cased = 0;
1998 for (; p < e; p++) {
1999 if (isupper(*p))
2000 return PyInt_FromLong(0);
2001 else if (!cased && islower(*p))
2002 cased = 1;
2003 }
2004 return PyInt_FromLong(cased);
2005}
2006
2007
2008static char isupper__doc__[] =
2009"S.isupper() -> int\n\
2010\n\
2011Return 1 if all cased characters in S are uppercase and there is\n\
2012at least one cased character in S, 0 otherwise.";
2013
2014static PyObject*
2015string_isupper(PyStringObject *self, PyObject *args)
2016{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002017 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2018 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002019 int cased;
2020
2021 if (!PyArg_NoArgs(args))
2022 return NULL;
2023
2024 /* Shortcut for single character strings */
2025 if (PyString_GET_SIZE(self) == 1)
2026 return PyInt_FromLong(isupper(*p) != 0);
2027
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002028 /* Special case for empty strings */
2029 if (PyString_GET_SIZE(self) == 0)
2030 return PyInt_FromLong(0);
2031
Guido van Rossum4c08d552000-03-10 22:55:18 +00002032 e = p + PyString_GET_SIZE(self);
2033 cased = 0;
2034 for (; p < e; p++) {
2035 if (islower(*p))
2036 return PyInt_FromLong(0);
2037 else if (!cased && isupper(*p))
2038 cased = 1;
2039 }
2040 return PyInt_FromLong(cased);
2041}
2042
2043
2044static char istitle__doc__[] =
2045"S.istitle() -> int\n\
2046\n\
2047Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2048may only follow uncased characters and lowercase characters only cased\n\
2049ones. Return 0 otherwise.";
2050
2051static PyObject*
2052string_istitle(PyStringObject *self, PyObject *args)
2053{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002054 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2055 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002056 int cased, previous_is_cased;
2057
2058 if (!PyArg_NoArgs(args))
2059 return NULL;
2060
2061 /* Shortcut for single character strings */
2062 if (PyString_GET_SIZE(self) == 1)
2063 return PyInt_FromLong(isupper(*p) != 0);
2064
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002065 /* Special case for empty strings */
2066 if (PyString_GET_SIZE(self) == 0)
2067 return PyInt_FromLong(0);
2068
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069 e = p + PyString_GET_SIZE(self);
2070 cased = 0;
2071 previous_is_cased = 0;
2072 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002073 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002074
2075 if (isupper(ch)) {
2076 if (previous_is_cased)
2077 return PyInt_FromLong(0);
2078 previous_is_cased = 1;
2079 cased = 1;
2080 }
2081 else if (islower(ch)) {
2082 if (!previous_is_cased)
2083 return PyInt_FromLong(0);
2084 previous_is_cased = 1;
2085 cased = 1;
2086 }
2087 else
2088 previous_is_cased = 0;
2089 }
2090 return PyInt_FromLong(cased);
2091}
2092
2093
2094static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002095"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096\n\
2097Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002098Line breaks are not included in the resulting list unless keepends\n\
2099is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002100
2101#define SPLIT_APPEND(data, left, right) \
2102 str = PyString_FromStringAndSize(data + left, right - left); \
2103 if (!str) \
2104 goto onError; \
2105 if (PyList_Append(list, str)) { \
2106 Py_DECREF(str); \
2107 goto onError; \
2108 } \
2109 else \
2110 Py_DECREF(str);
2111
2112static PyObject*
2113string_splitlines(PyStringObject *self, PyObject *args)
2114{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002115 register int i;
2116 register int j;
2117 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002118 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002119 PyObject *list;
2120 PyObject *str;
2121 char *data;
2122
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002123 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002124 return NULL;
2125
2126 data = PyString_AS_STRING(self);
2127 len = PyString_GET_SIZE(self);
2128
Guido van Rossum4c08d552000-03-10 22:55:18 +00002129 list = PyList_New(0);
2130 if (!list)
2131 goto onError;
2132
2133 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002134 int eol;
2135
Guido van Rossum4c08d552000-03-10 22:55:18 +00002136 /* Find a line and append it */
2137 while (i < len && data[i] != '\n' && data[i] != '\r')
2138 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002139
2140 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002141 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002142 if (i < len) {
2143 if (data[i] == '\r' && i + 1 < len &&
2144 data[i+1] == '\n')
2145 i += 2;
2146 else
2147 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002148 if (keepends)
2149 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002151 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002152 j = i;
2153 }
2154 if (j < len) {
2155 SPLIT_APPEND(data, j, len);
2156 }
2157
2158 return list;
2159
2160 onError:
2161 Py_DECREF(list);
2162 return NULL;
2163}
2164
2165#undef SPLIT_APPEND
2166
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167
2168static PyMethodDef
2169string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002170 /* Counterparts of the obsolete stropmodule functions; except
2171 string.maketrans(). */
2172 {"join", (PyCFunction)string_join, 1, join__doc__},
2173 {"split", (PyCFunction)string_split, 1, split__doc__},
2174 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2175 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2176 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2177 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2178 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2179 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2180 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2182 {"count", (PyCFunction)string_count, 1, count__doc__},
2183 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2184 {"find", (PyCFunction)string_find, 1, find__doc__},
2185 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2188 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2189 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2190 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2192 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2193 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2195 {"title", (PyCFunction)string_title, 1, title__doc__},
2196 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2197 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2198 {"center", (PyCFunction)string_center, 1, center__doc__},
2199 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2200 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2201#if 0
2202 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2203#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204 {NULL, NULL} /* sentinel */
2205};
2206
2207static PyObject *
2208string_getattr(s, name)
2209 PyStringObject *s;
2210 char *name;
2211{
2212 return Py_FindMethod(string_methods, (PyObject*)s, name);
2213}
2214
2215
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002216PyTypeObject PyString_Type = {
2217 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002218 0,
2219 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002220 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002221 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002222 (destructor)string_dealloc, /*tp_dealloc*/
2223 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002225 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002226 (cmpfunc)string_compare, /*tp_compare*/
2227 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002228 0, /*tp_as_number*/
2229 &string_as_sequence, /*tp_as_sequence*/
2230 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002231 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002232 0, /*tp_call*/
2233 0, /*tp_str*/
2234 0, /*tp_getattro*/
2235 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002236 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002237 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002238 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002239};
2240
2241void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002242PyString_Concat(pv, w)
2243 register PyObject **pv;
2244 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002245{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002246 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002247 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002248 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002249 if (w == NULL || !PyString_Check(*pv)) {
2250 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002251 *pv = NULL;
2252 return;
2253 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002254 v = string_concat((PyStringObject *) *pv, w);
2255 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002256 *pv = v;
2257}
2258
Guido van Rossum013142a1994-08-30 08:19:36 +00002259void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002260PyString_ConcatAndDel(pv, w)
2261 register PyObject **pv;
2262 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002263{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002264 PyString_Concat(pv, w);
2265 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002266}
2267
2268
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002269/* The following function breaks the notion that strings are immutable:
2270 it changes the size of a string. We get away with this only if there
2271 is only one module referencing the object. You can also think of it
2272 as creating a new string object and destroying the old one, only
2273 more efficiently. In any case, don't use this if the string may
2274 already be known to some other part of the code... */
2275
2276int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002277_PyString_Resize(pv, newsize)
2278 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002279 int newsize;
2280{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002281 register PyObject *v;
2282 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002283 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002284 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002285 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002286 Py_DECREF(v);
2287 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002288 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002289 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002290 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002291#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002292 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002293#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002294 _Py_ForgetReference(v);
2295 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002296 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002297 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002298 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002299 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002300 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002301 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002302 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002303 _Py_NewReference(*pv);
2304 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002305 sv->ob_size = newsize;
2306 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002307 return 0;
2308}
Guido van Rossume5372401993-03-16 12:15:04 +00002309
2310/* Helpers for formatstring */
2311
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002312static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002313getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002314 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002315 int arglen;
2316 int *p_argidx;
2317{
2318 int argidx = *p_argidx;
2319 if (argidx < arglen) {
2320 (*p_argidx)++;
2321 if (arglen < 0)
2322 return args;
2323 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002324 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002325 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002326 PyErr_SetString(PyExc_TypeError,
2327 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002328 return NULL;
2329}
2330
2331#define F_LJUST (1<<0)
2332#define F_SIGN (1<<1)
2333#define F_BLANK (1<<2)
2334#define F_ALT (1<<3)
2335#define F_ZERO (1<<4)
2336
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002337static int
2338formatfloat(buf, flags, prec, type, v)
2339 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002340 int flags;
2341 int prec;
2342 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002343 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002344{
2345 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002346 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002347 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002348 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002349 if (prec < 0)
2350 prec = 6;
2351 if (prec > 50)
2352 prec = 50; /* Arbitrary limitation */
2353 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2354 type = 'g';
2355 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2356 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002357 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002358}
2359
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002360static int
2361formatint(buf, flags, prec, type, v)
2362 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002363 int flags;
2364 int prec;
2365 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002366 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002367{
2368 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002369 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002370 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002371 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002372 if (prec < 0)
2373 prec = 1;
2374 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2375 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002376 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002377}
2378
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002379static int
2380formatchar(buf, v)
2381 char *buf;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002382 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002383{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002384 if (PyString_Check(v)) {
2385 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002386 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002387 }
2388 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002389 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002390 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002391 }
2392 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002393 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002394}
2395
Guido van Rossum013142a1994-08-30 08:19:36 +00002396
Guido van Rossume5372401993-03-16 12:15:04 +00002397/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
2398
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002399PyObject *
2400PyString_Format(format, args)
2401 PyObject *format;
2402 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002403{
2404 char *fmt, *res;
2405 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002406 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002407 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002408 PyObject *dict = NULL;
2409 if (format == NULL || !PyString_Check(format) || args == NULL) {
2410 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002411 return NULL;
2412 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002413 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002414 fmt = PyString_AsString(format);
2415 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002416 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002417 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002418 if (result == NULL)
2419 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002420 res = PyString_AsString(result);
2421 if (PyTuple_Check(args)) {
2422 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002423 argidx = 0;
2424 }
2425 else {
2426 arglen = -1;
2427 argidx = -2;
2428 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002429 if (args->ob_type->tp_as_mapping)
2430 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002431 while (--fmtcnt >= 0) {
2432 if (*fmt != '%') {
2433 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002434 rescnt = fmtcnt + 100;
2435 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002436 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002437 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002438 res = PyString_AsString(result)
2439 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002440 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002441 }
2442 *res++ = *fmt++;
2443 }
2444 else {
2445 /* Got a format specifier */
2446 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002447 int width = -1;
2448 int prec = -1;
2449 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002450 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002451 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002452 PyObject *v = NULL;
2453 PyObject *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +00002454 char *buf;
2455 int sign;
2456 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002457 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002458 char *fmt_start = fmt;
2459
Guido van Rossumda9c2711996-12-05 21:58:58 +00002460 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002461 if (*fmt == '(') {
2462 char *keystart;
2463 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002464 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002465 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002466
2467 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002468 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002469 "format requires a mapping");
2470 goto error;
2471 }
2472 ++fmt;
2473 --fmtcnt;
2474 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002475 /* Skip over balanced parentheses */
2476 while (pcount > 0 && --fmtcnt >= 0) {
2477 if (*fmt == ')')
2478 --pcount;
2479 else if (*fmt == '(')
2480 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002481 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002482 }
2483 keylen = fmt - keystart - 1;
2484 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002485 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002486 "incomplete format key");
2487 goto error;
2488 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002489 key = PyString_FromStringAndSize(keystart,
2490 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002491 if (key == NULL)
2492 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002493 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002494 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002495 args_owned = 0;
2496 }
2497 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002498 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002499 if (args == NULL) {
2500 goto error;
2501 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002502 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002503 arglen = -1;
2504 argidx = -2;
2505 }
Guido van Rossume5372401993-03-16 12:15:04 +00002506 while (--fmtcnt >= 0) {
2507 switch (c = *fmt++) {
2508 case '-': flags |= F_LJUST; continue;
2509 case '+': flags |= F_SIGN; continue;
2510 case ' ': flags |= F_BLANK; continue;
2511 case '#': flags |= F_ALT; continue;
2512 case '0': flags |= F_ZERO; continue;
2513 }
2514 break;
2515 }
2516 if (c == '*') {
2517 v = getnextarg(args, arglen, &argidx);
2518 if (v == NULL)
2519 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002520 if (!PyInt_Check(v)) {
2521 PyErr_SetString(PyExc_TypeError,
2522 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002523 goto error;
2524 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002525 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002526 if (width < 0) {
2527 flags |= F_LJUST;
2528 width = -width;
2529 }
Guido van Rossume5372401993-03-16 12:15:04 +00002530 if (--fmtcnt >= 0)
2531 c = *fmt++;
2532 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002533 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002534 width = c - '0';
2535 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002536 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002537 if (!isdigit(c))
2538 break;
2539 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540 PyErr_SetString(
2541 PyExc_ValueError,
2542 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002543 goto error;
2544 }
2545 width = width*10 + (c - '0');
2546 }
2547 }
2548 if (c == '.') {
2549 prec = 0;
2550 if (--fmtcnt >= 0)
2551 c = *fmt++;
2552 if (c == '*') {
2553 v = getnextarg(args, arglen, &argidx);
2554 if (v == NULL)
2555 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002556 if (!PyInt_Check(v)) {
2557 PyErr_SetString(
2558 PyExc_TypeError,
2559 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002560 goto error;
2561 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002562 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002563 if (prec < 0)
2564 prec = 0;
2565 if (--fmtcnt >= 0)
2566 c = *fmt++;
2567 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002568 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002569 prec = c - '0';
2570 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002571 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002572 if (!isdigit(c))
2573 break;
2574 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002575 PyErr_SetString(
2576 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002577 "prec too big");
2578 goto error;
2579 }
2580 prec = prec*10 + (c - '0');
2581 }
2582 }
2583 } /* prec */
2584 if (fmtcnt >= 0) {
2585 if (c == 'h' || c == 'l' || c == 'L') {
2586 size = c;
2587 if (--fmtcnt >= 0)
2588 c = *fmt++;
2589 }
2590 }
2591 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002592 PyErr_SetString(PyExc_ValueError,
2593 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002594 goto error;
2595 }
2596 if (c != '%') {
2597 v = getnextarg(args, arglen, &argidx);
2598 if (v == NULL)
2599 goto error;
2600 }
2601 sign = 0;
2602 fill = ' ';
2603 switch (c) {
2604 case '%':
2605 buf = "%";
2606 len = 1;
2607 break;
2608 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002609 case 'r':
2610 if (PyUnicode_Check(v)) {
2611 fmt = fmt_start;
2612 goto unicode;
2613 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002614 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002615 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002616 else
2617 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002618 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002619 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002620 if (!PyString_Check(temp)) {
2621 PyErr_SetString(PyExc_TypeError,
2622 "%s argument has non-string str()");
2623 goto error;
2624 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002625 buf = PyString_AsString(temp);
2626 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002627 if (prec >= 0 && len > prec)
2628 len = prec;
2629 break;
2630 case 'i':
2631 case 'd':
2632 case 'u':
2633 case 'o':
2634 case 'x':
2635 case 'X':
2636 if (c == 'i')
2637 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002638 buf = tmpbuf;
2639 len = formatint(buf, flags, prec, c, v);
2640 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002641 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002642 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002643 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002644 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002645 if ((flags&F_ALT) &&
2646 (c == 'x' || c == 'X') &&
2647 buf[0] == '0' && buf[1] == c) {
2648 *res++ = *buf++;
2649 *res++ = *buf++;
2650 rescnt -= 2;
2651 len -= 2;
2652 width -= 2;
2653 if (width < 0)
2654 width = 0;
2655 }
2656 }
Guido van Rossume5372401993-03-16 12:15:04 +00002657 break;
2658 case 'e':
2659 case 'E':
2660 case 'f':
2661 case 'g':
2662 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002663 buf = tmpbuf;
2664 len = formatfloat(buf, flags, prec, c, v);
2665 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002666 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002667 sign = 1;
2668 if (flags&F_ZERO)
2669 fill = '0';
2670 break;
2671 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002672 buf = tmpbuf;
2673 len = formatchar(buf, v);
2674 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002675 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002676 break;
2677 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002678 PyErr_Format(PyExc_ValueError,
2679 "unsupported format character '%c' (0x%x)",
2680 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002681 goto error;
2682 }
2683 if (sign) {
2684 if (*buf == '-' || *buf == '+') {
2685 sign = *buf++;
2686 len--;
2687 }
2688 else if (flags & F_SIGN)
2689 sign = '+';
2690 else if (flags & F_BLANK)
2691 sign = ' ';
2692 else
2693 sign = '\0';
2694 }
2695 if (width < len)
2696 width = len;
2697 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002698 reslen -= rescnt;
2699 rescnt = width + fmtcnt + 100;
2700 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002701 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002702 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002703 res = PyString_AsString(result)
2704 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002705 }
2706 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002707 if (fill != ' ')
2708 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002709 rescnt--;
2710 if (width > len)
2711 width--;
2712 }
2713 if (width > len && !(flags&F_LJUST)) {
2714 do {
2715 --rescnt;
2716 *res++ = fill;
2717 } while (--width > len);
2718 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002719 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002720 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002721 memcpy(res, buf, len);
2722 res += len;
2723 rescnt -= len;
2724 while (--width >= len) {
2725 --rescnt;
2726 *res++ = ' ';
2727 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002728 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002729 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002730 "not all arguments converted");
2731 goto error;
2732 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002733 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002734 } /* '%' */
2735 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002736 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002737 PyErr_SetString(PyExc_TypeError,
2738 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002739 goto error;
2740 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002741 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002742 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002743 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002744 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002745 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002746
2747 unicode:
2748 if (args_owned) {
2749 Py_DECREF(args);
2750 args_owned = 0;
2751 }
2752 /* Fiddle args right (remove the first argidx-1 arguments) */
2753 --argidx;
2754 if (PyTuple_Check(orig_args) && argidx > 0) {
2755 PyObject *v;
2756 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2757 v = PyTuple_New(n);
2758 if (v == NULL)
2759 goto error;
2760 while (--n >= 0) {
2761 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2762 Py_INCREF(w);
2763 PyTuple_SET_ITEM(v, n, w);
2764 }
2765 args = v;
2766 } else {
2767 Py_INCREF(orig_args);
2768 args = orig_args;
2769 }
2770 /* Paste rest of format string to what we have of the result
2771 string; we reuse result for this */
2772 rescnt = res - PyString_AS_STRING(result);
2773 fmtcnt = PyString_GET_SIZE(format) - \
2774 (fmt - PyString_AS_STRING(format));
2775 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2776 Py_DECREF(args);
2777 goto error;
2778 }
2779 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2780 format = result;
2781 /* Let Unicode do its magic */
2782 result = PyUnicode_Format(format, args);
2783 Py_DECREF(format);
2784 Py_DECREF(args);
2785 return result;
2786
Guido van Rossume5372401993-03-16 12:15:04 +00002787 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002788 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002789 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002791 }
Guido van Rossume5372401993-03-16 12:15:04 +00002792 return NULL;
2793}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002794
2795
2796#ifdef INTERN_STRINGS
2797
2798static PyObject *interned;
2799
2800void
2801PyString_InternInPlace(p)
2802 PyObject **p;
2803{
2804 register PyStringObject *s = (PyStringObject *)(*p);
2805 PyObject *t;
2806 if (s == NULL || !PyString_Check(s))
2807 Py_FatalError("PyString_InternInPlace: strings only please!");
2808 if ((t = s->ob_sinterned) != NULL) {
2809 if (t == (PyObject *)s)
2810 return;
2811 Py_INCREF(t);
2812 *p = t;
2813 Py_DECREF(s);
2814 return;
2815 }
2816 if (interned == NULL) {
2817 interned = PyDict_New();
2818 if (interned == NULL)
2819 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002820 }
2821 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2822 Py_INCREF(t);
2823 *p = s->ob_sinterned = t;
2824 Py_DECREF(s);
2825 return;
2826 }
2827 t = (PyObject *)s;
2828 if (PyDict_SetItem(interned, t, t) == 0) {
2829 s->ob_sinterned = t;
2830 return;
2831 }
2832 PyErr_Clear();
2833}
2834
2835
2836PyObject *
2837PyString_InternFromString(cp)
2838 const char *cp;
2839{
2840 PyObject *s = PyString_FromString(cp);
2841 if (s == NULL)
2842 return NULL;
2843 PyString_InternInPlace(&s);
2844 return s;
2845}
2846
2847#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002848
2849void
2850PyString_Fini()
2851{
2852 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002853 for (i = 0; i < UCHAR_MAX + 1; i++) {
2854 Py_XDECREF(characters[i]);
2855 characters[i] = NULL;
2856 }
2857#ifndef DONT_SHARE_SHORT_STRINGS
2858 Py_XDECREF(nullstring);
2859 nullstring = NULL;
2860#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002861#ifdef INTERN_STRINGS
2862 if (interned) {
2863 int pos, changed;
2864 PyObject *key, *value;
2865 do {
2866 changed = 0;
2867 pos = 0;
2868 while (PyDict_Next(interned, &pos, &key, &value)) {
2869 if (key->ob_refcnt == 2 && key == value) {
2870 PyDict_DelItem(interned, key);
2871 changed = 1;
2872 }
2873 }
2874 } while (changed);
2875 }
2876#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002877}