blob: ce6548b6f64d044081d4c6005c333476fd6f7dcd [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum71160aa1997-06-03 18:03:18 +000036#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000037#include <ctype.h>
38
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000039#ifdef COUNT_ALLOCS
40int null_strings, one_strings;
41#endif
42
Guido van Rossum03093a21994-09-28 15:51:32 +000043#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000044#include <limits.h>
45#else
46#ifndef UCHAR_MAX
47#define UCHAR_MAX 255
48#endif
49#endif
50
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000052#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055
56/*
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
64 not NULL.
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
70*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071PyObject *
72PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000073 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074 int size;
75{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000077#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
92 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000095
96 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000098 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000099 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000101 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102#ifdef CACHE_HASH
103 op->ob_shash = -1;
104#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000105#ifdef INTERN_STRINGS
106 op->ob_sinterned = NULL;
107#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000108 if (str != NULL)
109 memcpy(op->ob_sval, str, size);
110 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0) {
113 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 } else if (size == 1 && str != NULL) {
116 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121}
122
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123PyObject *
124PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000125 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126{
127 register unsigned int size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000129#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 if (size == 0 && (op = nullstring) != NULL) {
131#ifdef COUNT_ALLOCS
132 null_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138#ifdef COUNT_ALLOCS
139 one_strings++;
140#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000144#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145
146 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000148 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000149 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000151 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152#ifdef CACHE_HASH
153 op->ob_shash = -1;
154#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000155#ifdef INTERN_STRINGS
156 op->ob_sinterned = NULL;
157#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000158 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000159#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 if (size == 0) {
161 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000162 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000163 } else if (size == 1) {
164 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000165 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000166 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000167#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000168 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169}
170
Guido van Rossum234f9421993-06-17 12:35:49 +0000171static void
Guido van Rossume5372401993-03-16 12:15:04 +0000172string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000173 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000174{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000175 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000176}
177
Guido van Rossumd7047b31995-01-02 19:07:15 +0000178int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000179PyString_Size(op)
180 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000182 if (!PyString_Check(op)) {
183 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184 return -1;
185 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000186 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000187}
188
189/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000190PyString_AsString(op)
191 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000193 if (!PyString_Check(op)) {
194 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 return NULL;
196 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000197 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198}
199
200/* Methods */
201
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000202static int
Guido van Rossume5372401993-03-16 12:15:04 +0000203string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000204 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 FILE *fp;
206 int flags;
207{
208 int i;
209 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000210 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000212 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000214 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216
217 /* figure out which quote to use; single is prefered */
218 quote = '\'';
219 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
220 quote = '"';
221
222 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 for (i = 0; i < op->ob_size; i++) {
224 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 fprintf(fp, "\\%c", c);
227 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000228 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000232 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000233 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234}
235
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000236static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000237string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239{
240 /* XXX overflow? */
241 int newsize = 2 + 4 * op->ob_size * sizeof(char);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000242 PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000244 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000245 }
246 else {
247 register int i;
248 register char c;
249 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000250 int quote;
251
252 /* figure out which quote to use; single is prefered */
253 quote = '\'';
254 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
255 quote = '"';
256
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000257 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 for (i = 0; i < op->ob_size; i++) {
260 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 *p++ = '\\', *p++ = c;
263 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000264 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 while (*p != '\0')
266 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 }
268 else
269 *p++ = c;
270 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000271 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000273 _PyString_Resize(
274 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000275 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277}
278
279static int
Guido van Rossume5372401993-03-16 12:15:04 +0000280string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282{
283 return a->ob_size;
284}
285
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000286static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000287string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000288 register PyStringObject *a;
289 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290{
291 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 register PyStringObject *op;
293 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000294 if (PyUnicode_Check(bb))
295 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000296 PyErr_Format(PyExc_TypeError,
297 "cannot add type \"%.200s\" to string",
298 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000299 return NULL;
300 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000301#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000302 /* Optimize cases with empty left or right operand */
303 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000304 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000305 return bb;
306 }
307 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000308 Py_INCREF(a);
309 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000310 }
311 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000312 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000313 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000314 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000315 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000316 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000317 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000318#ifdef CACHE_HASH
319 op->ob_shash = -1;
320#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000321#ifdef INTERN_STRINGS
322 op->ob_sinterned = NULL;
323#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000324 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
325 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
326 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000327 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000328#undef b
329}
330
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000331static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000332string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000333 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 register int n;
335{
336 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000337 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000338 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339 if (n < 0)
340 n = 0;
341 size = a->ob_size * n;
342 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000343 Py_INCREF(a);
344 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000345 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000346 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000348 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000349 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000350 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000351 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000352#ifdef CACHE_HASH
353 op->ob_shash = -1;
354#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000355#ifdef INTERN_STRINGS
356 op->ob_sinterned = NULL;
357#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000358 for (i = 0; i < size; i += a->ob_size)
359 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
360 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000361 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362}
363
364/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
365
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000367string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000368 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000369 register int i, j; /* May be negative! */
370{
371 if (i < 0)
372 i = 0;
373 if (j < 0)
374 j = 0; /* Avoid signed/unsigned bug in next line */
375 if (j > a->ob_size)
376 j = a->ob_size;
377 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000378 Py_INCREF(a);
379 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380 }
381 if (j < i)
382 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000383 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000384}
385
Guido van Rossum9284a572000-03-07 15:53:43 +0000386static int
387string_contains(a, el)
388PyObject *a, *el;
389{
390 register char *s, *end;
391 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000392 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000393 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000394 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000395 PyErr_SetString(PyExc_TypeError,
396 "string member test needs char left operand");
397 return -1;
398 }
399 c = PyString_AsString(el)[0];
400 s = PyString_AsString(a);
401 end = s + PyString_Size(a);
402 while (s < end) {
403 if (c == *s++)
404 return 1;
405 }
406 return 0;
407}
408
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000410string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000411 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 register int i;
413{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000414 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000418 return NULL;
419 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000420 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000422#ifdef COUNT_ALLOCS
423 if (v != NULL)
424 one_strings++;
425#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000426 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000428 if (v == NULL)
429 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430 characters[c] = (PyStringObject *) v;
431 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000432 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000434 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435}
436
437static int
Guido van Rossume5372401993-03-16 12:15:04 +0000438string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440{
Guido van Rossum253919f1991-02-13 23:18:39 +0000441 int len_a = a->ob_size, len_b = b->ob_size;
442 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000443 int cmp;
444 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000445 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000446 if (cmp == 0)
447 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
448 if (cmp != 0)
449 return cmp;
450 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000451 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000452}
453
Guido van Rossum9bfef441993-03-29 10:43:31 +0000454static long
455string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000456 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000457{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000458 register int len;
459 register unsigned char *p;
460 register long x;
461
462#ifdef CACHE_HASH
463 if (a->ob_shash != -1)
464 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000465#ifdef INTERN_STRINGS
466 if (a->ob_sinterned != NULL)
467 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000468 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000469#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000470#endif
471 len = a->ob_size;
472 p = (unsigned char *) a->ob_sval;
473 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000474 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000475 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000476 x ^= a->ob_size;
477 if (x == -1)
478 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000479#ifdef CACHE_HASH
480 a->ob_shash = x;
481#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000482 return x;
483}
484
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000485static int
486string_buffer_getreadbuf(self, index, ptr)
487 PyStringObject *self;
488 int index;
489 const void **ptr;
490{
491 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000492 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000493 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000494 return -1;
495 }
496 *ptr = (void *)self->ob_sval;
497 return self->ob_size;
498}
499
500static int
501string_buffer_getwritebuf(self, index, ptr)
502 PyStringObject *self;
503 int index;
504 const void **ptr;
505{
Guido van Rossum045e6881997-09-08 18:30:11 +0000506 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000507 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000508 return -1;
509}
510
511static int
512string_buffer_getsegcount(self, lenp)
513 PyStringObject *self;
514 int *lenp;
515{
516 if ( lenp )
517 *lenp = self->ob_size;
518 return 1;
519}
520
Guido van Rossum1db70701998-10-08 02:18:52 +0000521static int
522string_buffer_getcharbuf(self, index, ptr)
523 PyStringObject *self;
524 int index;
525 const char **ptr;
526{
527 if ( index != 0 ) {
528 PyErr_SetString(PyExc_SystemError,
529 "accessing non-existent string segment");
530 return -1;
531 }
532 *ptr = self->ob_sval;
533 return self->ob_size;
534}
535
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000536static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000537 (inquiry)string_length, /*sq_length*/
538 (binaryfunc)string_concat, /*sq_concat*/
539 (intargfunc)string_repeat, /*sq_repeat*/
540 (intargfunc)string_item, /*sq_item*/
541 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000542 0, /*sq_ass_item*/
543 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000544 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000545};
546
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000547static PyBufferProcs string_as_buffer = {
548 (getreadbufferproc)string_buffer_getreadbuf,
549 (getwritebufferproc)string_buffer_getwritebuf,
550 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000551 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000552};
553
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000554
555
556#define LEFTSTRIP 0
557#define RIGHTSTRIP 1
558#define BOTHSTRIP 2
559
560
561static PyObject *
562split_whitespace(s, len, maxsplit)
563 char *s;
564 int len;
565 int maxsplit;
566{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000567 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000568 PyObject* item;
569 PyObject *list = PyList_New(0);
570
571 if (list == NULL)
572 return NULL;
573
Guido van Rossum4c08d552000-03-10 22:55:18 +0000574 for (i = j = 0; i < len; ) {
575 while (i < len && isspace(Py_CHARMASK(s[i])))
576 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000577 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000578 while (i < len && !isspace(Py_CHARMASK(s[i])))
579 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000580 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000581 if (maxsplit-- <= 0)
582 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000583 item = PyString_FromStringAndSize(s+j, (int)(i-j));
584 if (item == NULL)
585 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000586 err = PyList_Append(list, item);
587 Py_DECREF(item);
588 if (err < 0)
589 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000590 while (i < len && isspace(Py_CHARMASK(s[i])))
591 i++;
592 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000593 }
594 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000595 if (j < len) {
596 item = PyString_FromStringAndSize(s+j, (int)(len - j));
597 if (item == NULL)
598 goto finally;
599 err = PyList_Append(list, item);
600 Py_DECREF(item);
601 if (err < 0)
602 goto finally;
603 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000604 return list;
605 finally:
606 Py_DECREF(list);
607 return NULL;
608}
609
610
611static char split__doc__[] =
612"S.split([sep [,maxsplit]]) -> list of strings\n\
613\n\
614Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000615delimiter string. If maxsplit is given, at most maxsplit\n\
616splits are done. If sep is not specified, any whitespace string\n\
617is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000618
619static PyObject *
620string_split(self, args)
621 PyStringObject *self;
622 PyObject *args;
623{
624 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 int maxsplit = -1;
626 const char *s = PyString_AS_STRING(self), *sub;
627 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000628
Guido van Rossum4c08d552000-03-10 22:55:18 +0000629 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000631 if (maxsplit < 0)
632 maxsplit = INT_MAX;
633 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000634 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000635 if (PyString_Check(subobj)) {
636 sub = PyString_AS_STRING(subobj);
637 n = PyString_GET_SIZE(subobj);
638 }
639 else if (PyUnicode_Check(subobj))
640 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
641 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
642 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000643 if (n == 0) {
644 PyErr_SetString(PyExc_ValueError, "empty separator");
645 return NULL;
646 }
647
648 list = PyList_New(0);
649 if (list == NULL)
650 return NULL;
651
652 i = j = 0;
653 while (i+n <= len) {
654 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000655 if (maxsplit-- <= 0)
656 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000657 item = PyString_FromStringAndSize(s+j, (int)(i-j));
658 if (item == NULL)
659 goto fail;
660 err = PyList_Append(list, item);
661 Py_DECREF(item);
662 if (err < 0)
663 goto fail;
664 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665 }
666 else
667 i++;
668 }
669 item = PyString_FromStringAndSize(s+j, (int)(len-j));
670 if (item == NULL)
671 goto fail;
672 err = PyList_Append(list, item);
673 Py_DECREF(item);
674 if (err < 0)
675 goto fail;
676
677 return list;
678
679 fail:
680 Py_DECREF(list);
681 return NULL;
682}
683
684
685static char join__doc__[] =
686"S.join(sequence) -> string\n\
687\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000688Return a string which is the concatenation of the strings in the\n\
689sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690
691static PyObject *
692string_join(self, args)
693 PyStringObject *self;
694 PyObject *args;
695{
696 char *sep = PyString_AS_STRING(self);
697 int seplen = PyString_GET_SIZE(self);
698 PyObject *res = NULL;
699 int reslen = 0;
700 char *p;
701 int seqlen = 0;
702 int sz = 100;
703 int i, slen;
704 PyObject *seq;
705
Guido van Rossum43713e52000-02-29 13:59:29 +0000706 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000707 return NULL;
708
709 seqlen = PySequence_Length(seq);
710 if (seqlen < 0 && PyErr_Occurred())
711 return NULL;
712
713 if (seqlen == 1) {
714 /* Optimization if there's only one item */
715 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000716 if (item == NULL)
717 return NULL;
718 if (!PyString_Check(item) &&
719 !PyUnicode_Check(item)) {
720 PyErr_SetString(PyExc_TypeError,
721 "first argument must be sequence of strings");
722 Py_DECREF(item);
723 return NULL;
724 }
725 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000726 }
727 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
728 return NULL;
729 p = PyString_AsString(res);
730
731 /* optimize for lists. all others (tuples and arbitrary sequences)
732 * just use the abstract interface.
733 */
734 if (PyList_Check(seq)) {
735 for (i = 0; i < seqlen; i++) {
736 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (!PyString_Check(item)){
738 if (PyUnicode_Check(item)) {
739 Py_DECREF(res);
740 return PyUnicode_Join(
741 (PyObject *)self,
742 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000743 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000744 PyErr_Format(PyExc_TypeError,
745 "sequence item %i not a string",
746 i);
747 goto finally;
748 }
749 slen = PyString_GET_SIZE(item);
750 while (reslen + slen + seplen >= sz) {
751 if (_PyString_Resize(&res, sz*2))
752 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000753 sz *= 2;
754 p = PyString_AsString(res) + reslen;
755 }
756 if (i > 0) {
757 memcpy(p, sep, seplen);
758 p += seplen;
759 reslen += seplen;
760 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000761 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 p += slen;
763 reslen += slen;
764 }
765 }
766 else {
767 for (i = 0; i < seqlen; i++) {
768 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000769 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000770 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000771 if (!PyString_Check(item)){
772 if (PyUnicode_Check(item)) {
773 Py_DECREF(res);
774 Py_DECREF(item);
775 return PyUnicode_Join(
776 (PyObject *)self,
777 seq);
778 }
779 Py_DECREF(item);
780 PyErr_Format(PyExc_TypeError,
781 "sequence item %i not a string",
782 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000783 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000784 }
785 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000786 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000787 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000788 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000790 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000791 sz *= 2;
792 p = PyString_AsString(res) + reslen;
793 }
794 if (i > 0) {
795 memcpy(p, sep, seplen);
796 p += seplen;
797 reslen += seplen;
798 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000799 memcpy(p, PyString_AS_STRING(item), slen);
800 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000801 p += slen;
802 reslen += slen;
803 }
804 }
805 if (_PyString_Resize(&res, reslen))
806 goto finally;
807 return res;
808
809 finally:
810 Py_DECREF(res);
811 return NULL;
812}
813
814
815
816static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000818 PyStringObject *self;
819 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000820 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000822 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000823 int len = PyString_GET_SIZE(self);
824 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000825 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000826
Guido van Rossumc6821402000-05-08 14:08:05 +0000827 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
828 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000829 return -2;
830 if (PyString_Check(subobj)) {
831 sub = PyString_AS_STRING(subobj);
832 n = PyString_GET_SIZE(subobj);
833 }
834 else if (PyUnicode_Check(subobj))
835 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
836 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000837 return -2;
838
839 if (last > len)
840 last = len;
841 if (last < 0)
842 last += len;
843 if (last < 0)
844 last = 0;
845 if (i < 0)
846 i += len;
847 if (i < 0)
848 i = 0;
849
Guido van Rossum4c08d552000-03-10 22:55:18 +0000850 if (dir > 0) {
851 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000852 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000853 last -= n;
854 for (; i <= last; ++i)
855 if (s[i] == sub[0] &&
856 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
857 return (long)i;
858 }
859 else {
860 int j;
861
862 if (n == 0 && i <= last)
863 return (long)last;
864 for (j = last-n; j >= i; --j)
865 if (s[j] == sub[0] &&
866 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
867 return (long)j;
868 }
869
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000870 return -1;
871}
872
873
874static char find__doc__[] =
875"S.find(sub [,start [,end]]) -> int\n\
876\n\
877Return the lowest index in S where substring sub is found,\n\
878such that sub is contained within s[start,end]. Optional\n\
879arguments start and end are interpreted as in slice notation.\n\
880\n\
881Return -1 on failure.";
882
883static PyObject *
884string_find(self, args)
885 PyStringObject *self;
886 PyObject *args;
887{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000888 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889 if (result == -2)
890 return NULL;
891 return PyInt_FromLong(result);
892}
893
894
895static char index__doc__[] =
896"S.index(sub [,start [,end]]) -> int\n\
897\n\
898Like S.find() but raise ValueError when the substring is not found.";
899
900static PyObject *
901string_index(self, args)
902 PyStringObject *self;
903 PyObject *args;
904{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000905 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000906 if (result == -2)
907 return NULL;
908 if (result == -1) {
909 PyErr_SetString(PyExc_ValueError,
910 "substring not found in string.index");
911 return NULL;
912 }
913 return PyInt_FromLong(result);
914}
915
916
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000917static char rfind__doc__[] =
918"S.rfind(sub [,start [,end]]) -> int\n\
919\n\
920Return the highest index in S where substring sub is found,\n\
921such that sub is contained within s[start,end]. Optional\n\
922arguments start and end are interpreted as in slice notation.\n\
923\n\
924Return -1 on failure.";
925
926static PyObject *
927string_rfind(self, args)
928 PyStringObject *self;
929 PyObject *args;
930{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000931 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000932 if (result == -2)
933 return NULL;
934 return PyInt_FromLong(result);
935}
936
937
938static char rindex__doc__[] =
939"S.rindex(sub [,start [,end]]) -> int\n\
940\n\
941Like S.rfind() but raise ValueError when the substring is not found.";
942
943static PyObject *
944string_rindex(self, args)
945 PyStringObject *self;
946 PyObject *args;
947{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000949 if (result == -2)
950 return NULL;
951 if (result == -1) {
952 PyErr_SetString(PyExc_ValueError,
953 "substring not found in string.rindex");
954 return NULL;
955 }
956 return PyInt_FromLong(result);
957}
958
959
960static PyObject *
961do_strip(self, args, striptype)
962 PyStringObject *self;
963 PyObject *args;
964 int striptype;
965{
966 char *s = PyString_AS_STRING(self);
967 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000968
Guido van Rossum43713e52000-02-29 13:59:29 +0000969 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000970 return NULL;
971
972 i = 0;
973 if (striptype != RIGHTSTRIP) {
974 while (i < len && isspace(Py_CHARMASK(s[i]))) {
975 i++;
976 }
977 }
978
979 j = len;
980 if (striptype != LEFTSTRIP) {
981 do {
982 j--;
983 } while (j >= i && isspace(Py_CHARMASK(s[j])));
984 j++;
985 }
986
987 if (i == 0 && j == len) {
988 Py_INCREF(self);
989 return (PyObject*)self;
990 }
991 else
992 return PyString_FromStringAndSize(s+i, j-i);
993}
994
995
996static char strip__doc__[] =
997"S.strip() -> string\n\
998\n\
999Return a copy of the string S with leading and trailing\n\
1000whitespace removed.";
1001
1002static PyObject *
1003string_strip(self, args)
1004 PyStringObject *self;
1005 PyObject *args;
1006{
1007 return do_strip(self, args, BOTHSTRIP);
1008}
1009
1010
1011static char lstrip__doc__[] =
1012"S.lstrip() -> string\n\
1013\n\
1014Return a copy of the string S with leading whitespace removed.";
1015
1016static PyObject *
1017string_lstrip(self, args)
1018 PyStringObject *self;
1019 PyObject *args;
1020{
1021 return do_strip(self, args, LEFTSTRIP);
1022}
1023
1024
1025static char rstrip__doc__[] =
1026"S.rstrip() -> string\n\
1027\n\
1028Return a copy of the string S with trailing whitespace removed.";
1029
1030static PyObject *
1031string_rstrip(self, args)
1032 PyStringObject *self;
1033 PyObject *args;
1034{
1035 return do_strip(self, args, RIGHTSTRIP);
1036}
1037
1038
1039static char lower__doc__[] =
1040"S.lower() -> string\n\
1041\n\
1042Return a copy of the string S converted to lowercase.";
1043
1044static PyObject *
1045string_lower(self, args)
1046 PyStringObject *self;
1047 PyObject *args;
1048{
1049 char *s = PyString_AS_STRING(self), *s_new;
1050 int i, n = PyString_GET_SIZE(self);
1051 PyObject *new;
1052
Guido van Rossum43713e52000-02-29 13:59:29 +00001053 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054 return NULL;
1055 new = PyString_FromStringAndSize(NULL, n);
1056 if (new == NULL)
1057 return NULL;
1058 s_new = PyString_AsString(new);
1059 for (i = 0; i < n; i++) {
1060 int c = Py_CHARMASK(*s++);
1061 if (isupper(c)) {
1062 *s_new = tolower(c);
1063 } else
1064 *s_new = c;
1065 s_new++;
1066 }
1067 return new;
1068}
1069
1070
1071static char upper__doc__[] =
1072"S.upper() -> string\n\
1073\n\
1074Return a copy of the string S converted to uppercase.";
1075
1076static PyObject *
1077string_upper(self, args)
1078 PyStringObject *self;
1079 PyObject *args;
1080{
1081 char *s = PyString_AS_STRING(self), *s_new;
1082 int i, n = PyString_GET_SIZE(self);
1083 PyObject *new;
1084
Guido van Rossum43713e52000-02-29 13:59:29 +00001085 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086 return NULL;
1087 new = PyString_FromStringAndSize(NULL, n);
1088 if (new == NULL)
1089 return NULL;
1090 s_new = PyString_AsString(new);
1091 for (i = 0; i < n; i++) {
1092 int c = Py_CHARMASK(*s++);
1093 if (islower(c)) {
1094 *s_new = toupper(c);
1095 } else
1096 *s_new = c;
1097 s_new++;
1098 }
1099 return new;
1100}
1101
1102
Guido van Rossum4c08d552000-03-10 22:55:18 +00001103static char title__doc__[] =
1104"S.title() -> string\n\
1105\n\
1106Return a titlecased version of S, i.e. words start with uppercase\n\
1107characters, all remaining cased characters have lowercase.";
1108
1109static PyObject*
1110string_title(PyUnicodeObject *self, PyObject *args)
1111{
1112 char *s = PyString_AS_STRING(self), *s_new;
1113 int i, n = PyString_GET_SIZE(self);
1114 int previous_is_cased = 0;
1115 PyObject *new;
1116
1117 if (!PyArg_ParseTuple(args, ":title"))
1118 return NULL;
1119 new = PyString_FromStringAndSize(NULL, n);
1120 if (new == NULL)
1121 return NULL;
1122 s_new = PyString_AsString(new);
1123 for (i = 0; i < n; i++) {
1124 int c = Py_CHARMASK(*s++);
1125 if (islower(c)) {
1126 if (!previous_is_cased)
1127 c = toupper(c);
1128 previous_is_cased = 1;
1129 } else if (isupper(c)) {
1130 if (previous_is_cased)
1131 c = tolower(c);
1132 previous_is_cased = 1;
1133 } else
1134 previous_is_cased = 0;
1135 *s_new++ = c;
1136 }
1137 return new;
1138}
1139
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140static char capitalize__doc__[] =
1141"S.capitalize() -> string\n\
1142\n\
1143Return a copy of the string S with only its first character\n\
1144capitalized.";
1145
1146static PyObject *
1147string_capitalize(self, args)
1148 PyStringObject *self;
1149 PyObject *args;
1150{
1151 char *s = PyString_AS_STRING(self), *s_new;
1152 int i, n = PyString_GET_SIZE(self);
1153 PyObject *new;
1154
Guido van Rossum43713e52000-02-29 13:59:29 +00001155 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001156 return NULL;
1157 new = PyString_FromStringAndSize(NULL, n);
1158 if (new == NULL)
1159 return NULL;
1160 s_new = PyString_AsString(new);
1161 if (0 < n) {
1162 int c = Py_CHARMASK(*s++);
1163 if (islower(c))
1164 *s_new = toupper(c);
1165 else
1166 *s_new = c;
1167 s_new++;
1168 }
1169 for (i = 1; i < n; i++) {
1170 int c = Py_CHARMASK(*s++);
1171 if (isupper(c))
1172 *s_new = tolower(c);
1173 else
1174 *s_new = c;
1175 s_new++;
1176 }
1177 return new;
1178}
1179
1180
1181static char count__doc__[] =
1182"S.count(sub[, start[, end]]) -> int\n\
1183\n\
1184Return the number of occurrences of substring sub in string\n\
1185S[start:end]. Optional arguments start and end are\n\
1186interpreted as in slice notation.";
1187
1188static PyObject *
1189string_count(self, args)
1190 PyStringObject *self;
1191 PyObject *args;
1192{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001193 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001194 int len = PyString_GET_SIZE(self), n;
1195 int i = 0, last = INT_MAX;
1196 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001197 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001198
Guido van Rossumc6821402000-05-08 14:08:05 +00001199 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1200 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001201 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001202
Guido van Rossum4c08d552000-03-10 22:55:18 +00001203 if (PyString_Check(subobj)) {
1204 sub = PyString_AS_STRING(subobj);
1205 n = PyString_GET_SIZE(subobj);
1206 }
1207 else if (PyUnicode_Check(subobj))
1208 return PyInt_FromLong(
1209 PyUnicode_Count((PyObject *)self, subobj, i, last));
1210 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1211 return NULL;
1212
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213 if (last > len)
1214 last = len;
1215 if (last < 0)
1216 last += len;
1217 if (last < 0)
1218 last = 0;
1219 if (i < 0)
1220 i += len;
1221 if (i < 0)
1222 i = 0;
1223 m = last + 1 - n;
1224 if (n == 0)
1225 return PyInt_FromLong((long) (m-i));
1226
1227 r = 0;
1228 while (i < m) {
1229 if (!memcmp(s+i, sub, n)) {
1230 r++;
1231 i += n;
1232 } else {
1233 i++;
1234 }
1235 }
1236 return PyInt_FromLong((long) r);
1237}
1238
1239
1240static char swapcase__doc__[] =
1241"S.swapcase() -> string\n\
1242\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001243Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001244converted to lowercase and vice versa.";
1245
1246static PyObject *
1247string_swapcase(self, args)
1248 PyStringObject *self;
1249 PyObject *args;
1250{
1251 char *s = PyString_AS_STRING(self), *s_new;
1252 int i, n = PyString_GET_SIZE(self);
1253 PyObject *new;
1254
Guido van Rossum43713e52000-02-29 13:59:29 +00001255 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001256 return NULL;
1257 new = PyString_FromStringAndSize(NULL, n);
1258 if (new == NULL)
1259 return NULL;
1260 s_new = PyString_AsString(new);
1261 for (i = 0; i < n; i++) {
1262 int c = Py_CHARMASK(*s++);
1263 if (islower(c)) {
1264 *s_new = toupper(c);
1265 }
1266 else if (isupper(c)) {
1267 *s_new = tolower(c);
1268 }
1269 else
1270 *s_new = c;
1271 s_new++;
1272 }
1273 return new;
1274}
1275
1276
1277static char translate__doc__[] =
1278"S.translate(table [,deletechars]) -> string\n\
1279\n\
1280Return a copy of the string S, where all characters occurring\n\
1281in the optional argument deletechars are removed, and the\n\
1282remaining characters have been mapped through the given\n\
1283translation table, which must be a string of length 256.";
1284
1285static PyObject *
1286string_translate(self, args)
1287 PyStringObject *self;
1288 PyObject *args;
1289{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001290 register char *input, *output;
1291 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292 register int i, c, changed = 0;
1293 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001294 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295 int inlen, tablen, dellen = 0;
1296 PyObject *result;
1297 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 if (!PyArg_ParseTuple(args, "O|O:translate",
1301 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001303
1304 if (PyString_Check(tableobj)) {
1305 table1 = PyString_AS_STRING(tableobj);
1306 tablen = PyString_GET_SIZE(tableobj);
1307 }
1308 else if (PyUnicode_Check(tableobj)) {
1309 /* Unicode .translate() does not support the deletechars
1310 parameter; instead a mapping to None will cause characters
1311 to be deleted. */
1312 if (delobj != NULL) {
1313 PyErr_SetString(PyExc_TypeError,
1314 "deletions are implemented differently for unicode");
1315 return NULL;
1316 }
1317 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1318 }
1319 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321
1322 if (delobj != NULL) {
1323 if (PyString_Check(delobj)) {
1324 del_table = PyString_AS_STRING(delobj);
1325 dellen = PyString_GET_SIZE(delobj);
1326 }
1327 else if (PyUnicode_Check(delobj)) {
1328 PyErr_SetString(PyExc_TypeError,
1329 "deletions are implemented differently for unicode");
1330 return NULL;
1331 }
1332 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1333 return NULL;
1334
1335 if (tablen != 256) {
1336 PyErr_SetString(PyExc_ValueError,
1337 "translation table must be 256 characters long");
1338 return NULL;
1339 }
1340 }
1341 else {
1342 del_table = NULL;
1343 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344 }
1345
1346 table = table1;
1347 inlen = PyString_Size(input_obj);
1348 result = PyString_FromStringAndSize((char *)NULL, inlen);
1349 if (result == NULL)
1350 return NULL;
1351 output_start = output = PyString_AsString(result);
1352 input = PyString_AsString(input_obj);
1353
1354 if (dellen == 0) {
1355 /* If no deletions are required, use faster code */
1356 for (i = inlen; --i >= 0; ) {
1357 c = Py_CHARMASK(*input++);
1358 if (Py_CHARMASK((*output++ = table[c])) != c)
1359 changed = 1;
1360 }
1361 if (changed)
1362 return result;
1363 Py_DECREF(result);
1364 Py_INCREF(input_obj);
1365 return input_obj;
1366 }
1367
1368 for (i = 0; i < 256; i++)
1369 trans_table[i] = Py_CHARMASK(table[i]);
1370
1371 for (i = 0; i < dellen; i++)
1372 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1373
1374 for (i = inlen; --i >= 0; ) {
1375 c = Py_CHARMASK(*input++);
1376 if (trans_table[c] != -1)
1377 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1378 continue;
1379 changed = 1;
1380 }
1381 if (!changed) {
1382 Py_DECREF(result);
1383 Py_INCREF(input_obj);
1384 return input_obj;
1385 }
1386 /* Fix the size of the resulting string */
1387 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1388 return NULL;
1389 return result;
1390}
1391
1392
1393/* What follows is used for implementing replace(). Perry Stoll. */
1394
1395/*
1396 mymemfind
1397
1398 strstr replacement for arbitrary blocks of memory.
1399
Barry Warsaw51ac5802000-03-20 16:36:48 +00001400 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 contents of memory pointed to by PAT. Returns the index into MEM if
1402 found, or -1 if not found. If len of PAT is greater than length of
1403 MEM, the function returns -1.
1404*/
1405static int
1406mymemfind(mem, len, pat, pat_len)
1407 char *mem;
1408 int len;
1409 char *pat;
1410 int pat_len;
1411{
1412 register int ii;
1413
1414 /* pattern can not occur in the last pat_len-1 chars */
1415 len -= pat_len;
1416
1417 for (ii = 0; ii <= len; ii++) {
1418 if (mem[ii] == pat[0] &&
1419 (pat_len == 1 ||
1420 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1421 return ii;
1422 }
1423 }
1424 return -1;
1425}
1426
1427/*
1428 mymemcnt
1429
1430 Return the number of distinct times PAT is found in MEM.
1431 meaning mem=1111 and pat==11 returns 2.
1432 mem=11111 and pat==11 also return 2.
1433 */
1434static int
1435mymemcnt(mem, len, pat, pat_len)
1436 char *mem;
1437 int len;
1438 char *pat;
1439 int pat_len;
1440{
1441 register int offset = 0;
1442 int nfound = 0;
1443
1444 while (len >= 0) {
1445 offset = mymemfind(mem, len, pat, pat_len);
1446 if (offset == -1)
1447 break;
1448 mem += offset + pat_len;
1449 len -= offset + pat_len;
1450 nfound++;
1451 }
1452 return nfound;
1453}
1454
1455/*
1456 mymemreplace
1457
1458 Return a string in which all occurences of PAT in memory STR are
1459 replaced with SUB.
1460
1461 If length of PAT is less than length of STR or there are no occurences
1462 of PAT in STR, then the original string is returned. Otherwise, a new
1463 string is allocated here and returned.
1464
1465 on return, out_len is:
1466 the length of output string, or
1467 -1 if the input string is returned, or
1468 unchanged if an error occurs (no memory).
1469
1470 return value is:
1471 the new string allocated locally, or
1472 NULL if an error occurred.
1473*/
1474static char *
1475mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1476 char *str;
1477 int len; /* input string */
1478 char *pat;
1479 int pat_len; /* pattern string to find */
1480 char *sub;
1481 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001482 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483 int *out_len;
1484
1485{
1486 char *out_s;
1487 char *new_s;
1488 int nfound, offset, new_len;
1489
1490 if (len == 0 || pat_len > len)
1491 goto return_same;
1492
1493 /* find length of output string */
1494 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001495 if (count < 0)
1496 count = INT_MAX;
1497 else if (nfound > count)
1498 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 if (nfound == 0)
1500 goto return_same;
1501 new_len = len + nfound*(sub_len - pat_len);
1502
Guido van Rossumb18618d2000-05-03 23:44:39 +00001503 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 if (new_s == NULL) return NULL;
1505
1506 *out_len = new_len;
1507 out_s = new_s;
1508
1509 while (len > 0) {
1510 /* find index of next instance of pattern */
1511 offset = mymemfind(str, len, pat, pat_len);
1512 /* if not found, break out of loop */
1513 if (offset == -1) break;
1514
1515 /* copy non matching part of input string */
1516 memcpy(new_s, str, offset); /* copy part of str before pat */
1517 str += offset + pat_len; /* move str past pattern */
1518 len -= offset + pat_len; /* reduce length of str remaining */
1519
1520 /* copy substitute into the output string */
1521 new_s += offset; /* move new_s to dest for sub string */
1522 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1523 new_s += sub_len; /* offset new_s past sub string */
1524
1525 /* break when we've done count replacements */
1526 if (--count == 0) break;
1527 }
1528 /* copy any remaining values into output string */
1529 if (len > 0)
1530 memcpy(new_s, str, len);
1531 return out_s;
1532
1533 return_same:
1534 *out_len = -1;
1535 return str;
1536}
1537
1538
1539static char replace__doc__[] =
1540"S.replace (old, new[, maxsplit]) -> string\n\
1541\n\
1542Return a copy of string S with all occurrences of substring\n\
1543old replaced by new. If the optional argument maxsplit is\n\
1544given, only the first maxsplit occurrences are replaced.";
1545
1546static PyObject *
1547string_replace(self, args)
1548 PyStringObject *self;
1549 PyObject *args;
1550{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001551 const char *str = PyString_AS_STRING(self), *sub, *repl;
1552 char *new_s;
1553 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1554 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001556 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001557
Guido van Rossum4c08d552000-03-10 22:55:18 +00001558 if (!PyArg_ParseTuple(args, "OO|i:replace",
1559 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001560 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001561
1562 if (PyString_Check(subobj)) {
1563 sub = PyString_AS_STRING(subobj);
1564 sub_len = PyString_GET_SIZE(subobj);
1565 }
1566 else if (PyUnicode_Check(subobj))
1567 return PyUnicode_Replace((PyObject *)self,
1568 subobj, replobj, count);
1569 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1570 return NULL;
1571
1572 if (PyString_Check(replobj)) {
1573 repl = PyString_AS_STRING(replobj);
1574 repl_len = PyString_GET_SIZE(replobj);
1575 }
1576 else if (PyUnicode_Check(replobj))
1577 return PyUnicode_Replace((PyObject *)self,
1578 subobj, replobj, count);
1579 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1580 return NULL;
1581
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001582 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001583 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001584 return NULL;
1585 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001586 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 if (new_s == NULL) {
1588 PyErr_NoMemory();
1589 return NULL;
1590 }
1591 if (out_len == -1) {
1592 /* we're returning another reference to self */
1593 new = (PyObject*)self;
1594 Py_INCREF(new);
1595 }
1596 else {
1597 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001598 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 }
1600 return new;
1601}
1602
1603
1604static char startswith__doc__[] =
1605"S.startswith(prefix[, start[, end]]) -> int\n\
1606\n\
1607Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1608optional start, test S beginning at that position. With optional end, stop\n\
1609comparing S at that position.";
1610
1611static PyObject *
1612string_startswith(self, args)
1613 PyStringObject *self;
1614 PyObject *args;
1615{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001618 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 int plen;
1620 int start = 0;
1621 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623
Guido van Rossumc6821402000-05-08 14:08:05 +00001624 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1625 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001626 return NULL;
1627 if (PyString_Check(subobj)) {
1628 prefix = PyString_AS_STRING(subobj);
1629 plen = PyString_GET_SIZE(subobj);
1630 }
1631 else if (PyUnicode_Check(subobj))
1632 return PyInt_FromLong(
1633 PyUnicode_Tailmatch((PyObject *)self,
1634 subobj, start, end, -1));
1635 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636 return NULL;
1637
1638 /* adopt Java semantics for index out of range. it is legal for
1639 * offset to be == plen, but this only returns true if prefix is
1640 * the empty string.
1641 */
1642 if (start < 0 || start+plen > len)
1643 return PyInt_FromLong(0);
1644
1645 if (!memcmp(str+start, prefix, plen)) {
1646 /* did the match end after the specified end? */
1647 if (end < 0)
1648 return PyInt_FromLong(1);
1649 else if (end - start < plen)
1650 return PyInt_FromLong(0);
1651 else
1652 return PyInt_FromLong(1);
1653 }
1654 else return PyInt_FromLong(0);
1655}
1656
1657
1658static char endswith__doc__[] =
1659"S.endswith(suffix[, start[, end]]) -> int\n\
1660\n\
1661Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1662optional start, test S beginning at that position. With optional end, stop\n\
1663comparing S at that position.";
1664
1665static PyObject *
1666string_endswith(self, args)
1667 PyStringObject *self;
1668 PyObject *args;
1669{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001670 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001672 const char* suffix;
1673 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674 int start = 0;
1675 int end = -1;
1676 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678
Guido van Rossumc6821402000-05-08 14:08:05 +00001679 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1680 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 return NULL;
1682 if (PyString_Check(subobj)) {
1683 suffix = PyString_AS_STRING(subobj);
1684 slen = PyString_GET_SIZE(subobj);
1685 }
1686 else if (PyUnicode_Check(subobj))
1687 return PyInt_FromLong(
1688 PyUnicode_Tailmatch((PyObject *)self,
1689 subobj, start, end, +1));
1690 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691 return NULL;
1692
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694 return PyInt_FromLong(0);
1695
1696 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001697 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698
Guido van Rossum4c08d552000-03-10 22:55:18 +00001699 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700 return PyInt_FromLong(1);
1701 else return PyInt_FromLong(0);
1702}
1703
1704
Guido van Rossum4c08d552000-03-10 22:55:18 +00001705static char expandtabs__doc__[] =
1706"S.expandtabs([tabsize]) -> string\n\
1707\n\
1708Return a copy of S where all tab characters are expanded using spaces.\n\
1709If tabsize is not given, a tab size of 8 characters is assumed.";
1710
1711static PyObject*
1712string_expandtabs(PyStringObject *self, PyObject *args)
1713{
1714 const char *e, *p;
1715 char *q;
1716 int i, j;
1717 PyObject *u;
1718 int tabsize = 8;
1719
1720 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1721 return NULL;
1722
1723 /* First pass: determine size of ouput string */
1724 i = j = 0;
1725 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1726 for (p = PyString_AS_STRING(self); p < e; p++)
1727 if (*p == '\t') {
1728 if (tabsize > 0)
1729 j += tabsize - (j % tabsize);
1730 }
1731 else {
1732 j++;
1733 if (*p == '\n' || *p == '\r') {
1734 i += j;
1735 j = 0;
1736 }
1737 }
1738
1739 /* Second pass: create output string and fill it */
1740 u = PyString_FromStringAndSize(NULL, i + j);
1741 if (!u)
1742 return NULL;
1743
1744 j = 0;
1745 q = PyString_AS_STRING(u);
1746
1747 for (p = PyString_AS_STRING(self); p < e; p++)
1748 if (*p == '\t') {
1749 if (tabsize > 0) {
1750 i = tabsize - (j % tabsize);
1751 j += i;
1752 while (i--)
1753 *q++ = ' ';
1754 }
1755 }
1756 else {
1757 j++;
1758 *q++ = *p;
1759 if (*p == '\n' || *p == '\r')
1760 j = 0;
1761 }
1762
1763 return u;
1764}
1765
1766static
1767PyObject *pad(PyStringObject *self,
1768 int left,
1769 int right,
1770 char fill)
1771{
1772 PyObject *u;
1773
1774 if (left < 0)
1775 left = 0;
1776 if (right < 0)
1777 right = 0;
1778
1779 if (left == 0 && right == 0) {
1780 Py_INCREF(self);
1781 return (PyObject *)self;
1782 }
1783
1784 u = PyString_FromStringAndSize(NULL,
1785 left + PyString_GET_SIZE(self) + right);
1786 if (u) {
1787 if (left)
1788 memset(PyString_AS_STRING(u), fill, left);
1789 memcpy(PyString_AS_STRING(u) + left,
1790 PyString_AS_STRING(self),
1791 PyString_GET_SIZE(self));
1792 if (right)
1793 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1794 fill, right);
1795 }
1796
1797 return u;
1798}
1799
1800static char ljust__doc__[] =
1801"S.ljust(width) -> string\n\
1802\n\
1803Return S left justified in a string of length width. Padding is\n\
1804done using spaces.";
1805
1806static PyObject *
1807string_ljust(PyStringObject *self, PyObject *args)
1808{
1809 int width;
1810 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1811 return NULL;
1812
1813 if (PyString_GET_SIZE(self) >= width) {
1814 Py_INCREF(self);
1815 return (PyObject*) self;
1816 }
1817
1818 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1819}
1820
1821
1822static char rjust__doc__[] =
1823"S.rjust(width) -> string\n\
1824\n\
1825Return S right justified in a string of length width. Padding is\n\
1826done using spaces.";
1827
1828static PyObject *
1829string_rjust(PyStringObject *self, PyObject *args)
1830{
1831 int width;
1832 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1833 return NULL;
1834
1835 if (PyString_GET_SIZE(self) >= width) {
1836 Py_INCREF(self);
1837 return (PyObject*) self;
1838 }
1839
1840 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1841}
1842
1843
1844static char center__doc__[] =
1845"S.center(width) -> string\n\
1846\n\
1847Return S centered in a string of length width. Padding is done\n\
1848using spaces.";
1849
1850static PyObject *
1851string_center(PyStringObject *self, PyObject *args)
1852{
1853 int marg, left;
1854 int width;
1855
1856 if (!PyArg_ParseTuple(args, "i:center", &width))
1857 return NULL;
1858
1859 if (PyString_GET_SIZE(self) >= width) {
1860 Py_INCREF(self);
1861 return (PyObject*) self;
1862 }
1863
1864 marg = width - PyString_GET_SIZE(self);
1865 left = marg / 2 + (marg & width & 1);
1866
1867 return pad(self, left, marg - left, ' ');
1868}
1869
1870#if 0
1871static char zfill__doc__[] =
1872"S.zfill(width) -> string\n\
1873\n\
1874Pad a numeric string x with zeros on the left, to fill a field\n\
1875of the specified width. The string x is never truncated.";
1876
1877static PyObject *
1878string_zfill(PyStringObject *self, PyObject *args)
1879{
1880 int fill;
1881 PyObject *u;
1882 char *str;
1883
1884 int width;
1885 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1886 return NULL;
1887
1888 if (PyString_GET_SIZE(self) >= width) {
1889 Py_INCREF(self);
1890 return (PyObject*) self;
1891 }
1892
1893 fill = width - PyString_GET_SIZE(self);
1894
1895 u = pad(self, fill, 0, '0');
1896 if (u == NULL)
1897 return NULL;
1898
1899 str = PyString_AS_STRING(u);
1900 if (str[fill] == '+' || str[fill] == '-') {
1901 /* move sign to beginning of string */
1902 str[0] = str[fill];
1903 str[fill] = '0';
1904 }
1905
1906 return u;
1907}
1908#endif
1909
1910static char isspace__doc__[] =
1911"S.isspace() -> int\n\
1912\n\
1913Return 1 if there are only whitespace characters in S,\n\
19140 otherwise.";
1915
1916static PyObject*
1917string_isspace(PyStringObject *self, PyObject *args)
1918{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001919 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1920 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001921
1922 if (!PyArg_NoArgs(args))
1923 return NULL;
1924
1925 /* Shortcut for single character strings */
1926 if (PyString_GET_SIZE(self) == 1 &&
1927 isspace(*p))
1928 return PyInt_FromLong(1);
1929
1930 e = p + PyString_GET_SIZE(self);
1931 for (; p < e; p++) {
1932 if (!isspace(*p))
1933 return PyInt_FromLong(0);
1934 }
1935 return PyInt_FromLong(1);
1936}
1937
1938
1939static char isdigit__doc__[] =
1940"S.isdigit() -> int\n\
1941\n\
1942Return 1 if there are only digit characters in S,\n\
19430 otherwise.";
1944
1945static PyObject*
1946string_isdigit(PyStringObject *self, PyObject *args)
1947{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001948 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1949 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001950
1951 if (!PyArg_NoArgs(args))
1952 return NULL;
1953
1954 /* Shortcut for single character strings */
1955 if (PyString_GET_SIZE(self) == 1 &&
1956 isdigit(*p))
1957 return PyInt_FromLong(1);
1958
1959 e = p + PyString_GET_SIZE(self);
1960 for (; p < e; p++) {
1961 if (!isdigit(*p))
1962 return PyInt_FromLong(0);
1963 }
1964 return PyInt_FromLong(1);
1965}
1966
1967
1968static char islower__doc__[] =
1969"S.islower() -> int\n\
1970\n\
1971Return 1 if all cased characters in S are lowercase and there is\n\
1972at least one cased character in S, 0 otherwise.";
1973
1974static PyObject*
1975string_islower(PyStringObject *self, PyObject *args)
1976{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001977 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1978 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001979 int cased;
1980
1981 if (!PyArg_NoArgs(args))
1982 return NULL;
1983
1984 /* Shortcut for single character strings */
1985 if (PyString_GET_SIZE(self) == 1)
1986 return PyInt_FromLong(islower(*p) != 0);
1987
1988 e = p + PyString_GET_SIZE(self);
1989 cased = 0;
1990 for (; p < e; p++) {
1991 if (isupper(*p))
1992 return PyInt_FromLong(0);
1993 else if (!cased && islower(*p))
1994 cased = 1;
1995 }
1996 return PyInt_FromLong(cased);
1997}
1998
1999
2000static char isupper__doc__[] =
2001"S.isupper() -> int\n\
2002\n\
2003Return 1 if all cased characters in S are uppercase and there is\n\
2004at least one cased character in S, 0 otherwise.";
2005
2006static PyObject*
2007string_isupper(PyStringObject *self, PyObject *args)
2008{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002009 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2010 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011 int cased;
2012
2013 if (!PyArg_NoArgs(args))
2014 return NULL;
2015
2016 /* Shortcut for single character strings */
2017 if (PyString_GET_SIZE(self) == 1)
2018 return PyInt_FromLong(isupper(*p) != 0);
2019
2020 e = p + PyString_GET_SIZE(self);
2021 cased = 0;
2022 for (; p < e; p++) {
2023 if (islower(*p))
2024 return PyInt_FromLong(0);
2025 else if (!cased && isupper(*p))
2026 cased = 1;
2027 }
2028 return PyInt_FromLong(cased);
2029}
2030
2031
2032static char istitle__doc__[] =
2033"S.istitle() -> int\n\
2034\n\
2035Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2036may only follow uncased characters and lowercase characters only cased\n\
2037ones. Return 0 otherwise.";
2038
2039static PyObject*
2040string_istitle(PyStringObject *self, PyObject *args)
2041{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002042 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2043 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 int cased, previous_is_cased;
2045
2046 if (!PyArg_NoArgs(args))
2047 return NULL;
2048
2049 /* Shortcut for single character strings */
2050 if (PyString_GET_SIZE(self) == 1)
2051 return PyInt_FromLong(isupper(*p) != 0);
2052
2053 e = p + PyString_GET_SIZE(self);
2054 cased = 0;
2055 previous_is_cased = 0;
2056 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002057 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002058
2059 if (isupper(ch)) {
2060 if (previous_is_cased)
2061 return PyInt_FromLong(0);
2062 previous_is_cased = 1;
2063 cased = 1;
2064 }
2065 else if (islower(ch)) {
2066 if (!previous_is_cased)
2067 return PyInt_FromLong(0);
2068 previous_is_cased = 1;
2069 cased = 1;
2070 }
2071 else
2072 previous_is_cased = 0;
2073 }
2074 return PyInt_FromLong(cased);
2075}
2076
2077
2078static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002079"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080\n\
2081Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002082Line breaks are not included in the resulting list unless keepends\n\
2083is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002084
2085#define SPLIT_APPEND(data, left, right) \
2086 str = PyString_FromStringAndSize(data + left, right - left); \
2087 if (!str) \
2088 goto onError; \
2089 if (PyList_Append(list, str)) { \
2090 Py_DECREF(str); \
2091 goto onError; \
2092 } \
2093 else \
2094 Py_DECREF(str);
2095
2096static PyObject*
2097string_splitlines(PyStringObject *self, PyObject *args)
2098{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099 register int i;
2100 register int j;
2101 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002102 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002103 PyObject *list;
2104 PyObject *str;
2105 char *data;
2106
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002107 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002108 return NULL;
2109
2110 data = PyString_AS_STRING(self);
2111 len = PyString_GET_SIZE(self);
2112
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113 list = PyList_New(0);
2114 if (!list)
2115 goto onError;
2116
2117 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002118 int eol;
2119
Guido van Rossum4c08d552000-03-10 22:55:18 +00002120 /* Find a line and append it */
2121 while (i < len && data[i] != '\n' && data[i] != '\r')
2122 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002123
2124 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002125 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002126 if (i < len) {
2127 if (data[i] == '\r' && i + 1 < len &&
2128 data[i+1] == '\n')
2129 i += 2;
2130 else
2131 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002132 if (keepends)
2133 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002135 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002136 j = i;
2137 }
2138 if (j < len) {
2139 SPLIT_APPEND(data, j, len);
2140 }
2141
2142 return list;
2143
2144 onError:
2145 Py_DECREF(list);
2146 return NULL;
2147}
2148
2149#undef SPLIT_APPEND
2150
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151
2152static PyMethodDef
2153string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002154 /* Counterparts of the obsolete stropmodule functions; except
2155 string.maketrans(). */
2156 {"join", (PyCFunction)string_join, 1, join__doc__},
2157 {"split", (PyCFunction)string_split, 1, split__doc__},
2158 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2159 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2160 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2161 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2162 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2163 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2164 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2166 {"count", (PyCFunction)string_count, 1, count__doc__},
2167 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2168 {"find", (PyCFunction)string_find, 1, find__doc__},
2169 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2172 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2173 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2174 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2176 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2177 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002178 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2179 {"title", (PyCFunction)string_title, 1, title__doc__},
2180 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2181 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2182 {"center", (PyCFunction)string_center, 1, center__doc__},
2183 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2184 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2185#if 0
2186 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2187#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188 {NULL, NULL} /* sentinel */
2189};
2190
2191static PyObject *
2192string_getattr(s, name)
2193 PyStringObject *s;
2194 char *name;
2195{
2196 return Py_FindMethod(string_methods, (PyObject*)s, name);
2197}
2198
2199
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002200PyTypeObject PyString_Type = {
2201 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002202 0,
2203 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002204 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002205 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002206 (destructor)string_dealloc, /*tp_dealloc*/
2207 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002209 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002210 (cmpfunc)string_compare, /*tp_compare*/
2211 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002212 0, /*tp_as_number*/
2213 &string_as_sequence, /*tp_as_sequence*/
2214 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002215 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002216 0, /*tp_call*/
2217 0, /*tp_str*/
2218 0, /*tp_getattro*/
2219 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002220 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002221 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002222 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002223};
2224
2225void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002226PyString_Concat(pv, w)
2227 register PyObject **pv;
2228 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002229{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002230 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002231 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002232 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002233 if (w == NULL || !PyString_Check(*pv)) {
2234 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002235 *pv = NULL;
2236 return;
2237 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002238 v = string_concat((PyStringObject *) *pv, w);
2239 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002240 *pv = v;
2241}
2242
Guido van Rossum013142a1994-08-30 08:19:36 +00002243void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002244PyString_ConcatAndDel(pv, w)
2245 register PyObject **pv;
2246 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002247{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002248 PyString_Concat(pv, w);
2249 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002250}
2251
2252
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002253/* The following function breaks the notion that strings are immutable:
2254 it changes the size of a string. We get away with this only if there
2255 is only one module referencing the object. You can also think of it
2256 as creating a new string object and destroying the old one, only
2257 more efficiently. In any case, don't use this if the string may
2258 already be known to some other part of the code... */
2259
2260int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002261_PyString_Resize(pv, newsize)
2262 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002263 int newsize;
2264{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002265 register PyObject *v;
2266 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002267 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002268 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002269 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002270 Py_DECREF(v);
2271 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002272 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002273 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002274 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002275#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002276 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002277#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002278 _Py_ForgetReference(v);
2279 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002280 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002281 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002282 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002283 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002284 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002285 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002286 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002287 _Py_NewReference(*pv);
2288 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002289 sv->ob_size = newsize;
2290 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002291 return 0;
2292}
Guido van Rossume5372401993-03-16 12:15:04 +00002293
2294/* Helpers for formatstring */
2295
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002296static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002297getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002298 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002299 int arglen;
2300 int *p_argidx;
2301{
2302 int argidx = *p_argidx;
2303 if (argidx < arglen) {
2304 (*p_argidx)++;
2305 if (arglen < 0)
2306 return args;
2307 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002308 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002309 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002310 PyErr_SetString(PyExc_TypeError,
2311 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002312 return NULL;
2313}
2314
2315#define F_LJUST (1<<0)
2316#define F_SIGN (1<<1)
2317#define F_BLANK (1<<2)
2318#define F_ALT (1<<3)
2319#define F_ZERO (1<<4)
2320
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002321static int
2322formatfloat(buf, flags, prec, type, v)
2323 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002324 int flags;
2325 int prec;
2326 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002327 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002328{
2329 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002330 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002331 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002332 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002333 if (prec < 0)
2334 prec = 6;
2335 if (prec > 50)
2336 prec = 50; /* Arbitrary limitation */
2337 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2338 type = 'g';
2339 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2340 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002341 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002342}
2343
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002344static int
2345formatint(buf, flags, prec, type, v)
2346 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002347 int flags;
2348 int prec;
2349 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002350 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002351{
2352 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002353 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002354 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002355 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002356 if (prec < 0)
2357 prec = 1;
2358 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2359 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002360 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002361}
2362
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002363static int
2364formatchar(buf, v)
2365 char *buf;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002366 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002367{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002368 if (PyString_Check(v)) {
2369 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002370 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002371 }
2372 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002373 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002374 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002375 }
2376 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002377 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002378}
2379
Guido van Rossum013142a1994-08-30 08:19:36 +00002380
Guido van Rossume5372401993-03-16 12:15:04 +00002381/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
2382
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002383PyObject *
2384PyString_Format(format, args)
2385 PyObject *format;
2386 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002387{
2388 char *fmt, *res;
2389 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002390 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002391 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002392 PyObject *dict = NULL;
2393 if (format == NULL || !PyString_Check(format) || args == NULL) {
2394 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002395 return NULL;
2396 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002397 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002398 fmt = PyString_AsString(format);
2399 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002400 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002401 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002402 if (result == NULL)
2403 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002404 res = PyString_AsString(result);
2405 if (PyTuple_Check(args)) {
2406 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002407 argidx = 0;
2408 }
2409 else {
2410 arglen = -1;
2411 argidx = -2;
2412 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002413 if (args->ob_type->tp_as_mapping)
2414 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002415 while (--fmtcnt >= 0) {
2416 if (*fmt != '%') {
2417 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002418 rescnt = fmtcnt + 100;
2419 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002420 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002421 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002422 res = PyString_AsString(result)
2423 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002424 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002425 }
2426 *res++ = *fmt++;
2427 }
2428 else {
2429 /* Got a format specifier */
2430 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002431 int width = -1;
2432 int prec = -1;
2433 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002434 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002435 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002436 PyObject *v = NULL;
2437 PyObject *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +00002438 char *buf;
2439 int sign;
2440 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002441 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002442 char *fmt_start = fmt;
2443
Guido van Rossumda9c2711996-12-05 21:58:58 +00002444 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002445 if (*fmt == '(') {
2446 char *keystart;
2447 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002448 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002449 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002450
2451 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002452 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002453 "format requires a mapping");
2454 goto error;
2455 }
2456 ++fmt;
2457 --fmtcnt;
2458 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002459 /* Skip over balanced parentheses */
2460 while (pcount > 0 && --fmtcnt >= 0) {
2461 if (*fmt == ')')
2462 --pcount;
2463 else if (*fmt == '(')
2464 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002465 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002466 }
2467 keylen = fmt - keystart - 1;
2468 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002469 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002470 "incomplete format key");
2471 goto error;
2472 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002473 key = PyString_FromStringAndSize(keystart,
2474 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002475 if (key == NULL)
2476 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002477 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002478 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002479 args_owned = 0;
2480 }
2481 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002482 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002483 if (args == NULL) {
2484 goto error;
2485 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002486 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002487 arglen = -1;
2488 argidx = -2;
2489 }
Guido van Rossume5372401993-03-16 12:15:04 +00002490 while (--fmtcnt >= 0) {
2491 switch (c = *fmt++) {
2492 case '-': flags |= F_LJUST; continue;
2493 case '+': flags |= F_SIGN; continue;
2494 case ' ': flags |= F_BLANK; continue;
2495 case '#': flags |= F_ALT; continue;
2496 case '0': flags |= F_ZERO; continue;
2497 }
2498 break;
2499 }
2500 if (c == '*') {
2501 v = getnextarg(args, arglen, &argidx);
2502 if (v == NULL)
2503 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002504 if (!PyInt_Check(v)) {
2505 PyErr_SetString(PyExc_TypeError,
2506 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002507 goto error;
2508 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002509 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002510 if (width < 0) {
2511 flags |= F_LJUST;
2512 width = -width;
2513 }
Guido van Rossume5372401993-03-16 12:15:04 +00002514 if (--fmtcnt >= 0)
2515 c = *fmt++;
2516 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002517 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002518 width = c - '0';
2519 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002520 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002521 if (!isdigit(c))
2522 break;
2523 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002524 PyErr_SetString(
2525 PyExc_ValueError,
2526 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002527 goto error;
2528 }
2529 width = width*10 + (c - '0');
2530 }
2531 }
2532 if (c == '.') {
2533 prec = 0;
2534 if (--fmtcnt >= 0)
2535 c = *fmt++;
2536 if (c == '*') {
2537 v = getnextarg(args, arglen, &argidx);
2538 if (v == NULL)
2539 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540 if (!PyInt_Check(v)) {
2541 PyErr_SetString(
2542 PyExc_TypeError,
2543 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002544 goto error;
2545 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002546 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002547 if (prec < 0)
2548 prec = 0;
2549 if (--fmtcnt >= 0)
2550 c = *fmt++;
2551 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002552 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002553 prec = c - '0';
2554 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002555 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002556 if (!isdigit(c))
2557 break;
2558 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002559 PyErr_SetString(
2560 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002561 "prec too big");
2562 goto error;
2563 }
2564 prec = prec*10 + (c - '0');
2565 }
2566 }
2567 } /* prec */
2568 if (fmtcnt >= 0) {
2569 if (c == 'h' || c == 'l' || c == 'L') {
2570 size = c;
2571 if (--fmtcnt >= 0)
2572 c = *fmt++;
2573 }
2574 }
2575 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002576 PyErr_SetString(PyExc_ValueError,
2577 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002578 goto error;
2579 }
2580 if (c != '%') {
2581 v = getnextarg(args, arglen, &argidx);
2582 if (v == NULL)
2583 goto error;
2584 }
2585 sign = 0;
2586 fill = ' ';
2587 switch (c) {
2588 case '%':
2589 buf = "%";
2590 len = 1;
2591 break;
2592 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002593 case 'r':
2594 if (PyUnicode_Check(v)) {
2595 fmt = fmt_start;
2596 goto unicode;
2597 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002598 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002599 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002600 else
2601 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002602 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002603 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002604 if (!PyString_Check(temp)) {
2605 PyErr_SetString(PyExc_TypeError,
2606 "%s argument has non-string str()");
2607 goto error;
2608 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002609 buf = PyString_AsString(temp);
2610 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002611 if (prec >= 0 && len > prec)
2612 len = prec;
2613 break;
2614 case 'i':
2615 case 'd':
2616 case 'u':
2617 case 'o':
2618 case 'x':
2619 case 'X':
2620 if (c == 'i')
2621 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002622 buf = tmpbuf;
2623 len = formatint(buf, flags, prec, c, v);
2624 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002625 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002626 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002627 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002628 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002629 if ((flags&F_ALT) &&
2630 (c == 'x' || c == 'X') &&
2631 buf[0] == '0' && buf[1] == c) {
2632 *res++ = *buf++;
2633 *res++ = *buf++;
2634 rescnt -= 2;
2635 len -= 2;
2636 width -= 2;
2637 if (width < 0)
2638 width = 0;
2639 }
2640 }
Guido van Rossume5372401993-03-16 12:15:04 +00002641 break;
2642 case 'e':
2643 case 'E':
2644 case 'f':
2645 case 'g':
2646 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002647 buf = tmpbuf;
2648 len = formatfloat(buf, flags, prec, c, v);
2649 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002650 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002651 sign = 1;
2652 if (flags&F_ZERO)
2653 fill = '0';
2654 break;
2655 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002656 buf = tmpbuf;
2657 len = formatchar(buf, v);
2658 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002659 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002660 break;
2661 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002662 PyErr_Format(PyExc_ValueError,
2663 "unsupported format character '%c' (0x%x)",
2664 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002665 goto error;
2666 }
2667 if (sign) {
2668 if (*buf == '-' || *buf == '+') {
2669 sign = *buf++;
2670 len--;
2671 }
2672 else if (flags & F_SIGN)
2673 sign = '+';
2674 else if (flags & F_BLANK)
2675 sign = ' ';
2676 else
2677 sign = '\0';
2678 }
2679 if (width < len)
2680 width = len;
2681 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002682 reslen -= rescnt;
2683 rescnt = width + fmtcnt + 100;
2684 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002685 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002686 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002687 res = PyString_AsString(result)
2688 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002689 }
2690 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002691 if (fill != ' ')
2692 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002693 rescnt--;
2694 if (width > len)
2695 width--;
2696 }
2697 if (width > len && !(flags&F_LJUST)) {
2698 do {
2699 --rescnt;
2700 *res++ = fill;
2701 } while (--width > len);
2702 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002703 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002704 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002705 memcpy(res, buf, len);
2706 res += len;
2707 rescnt -= len;
2708 while (--width >= len) {
2709 --rescnt;
2710 *res++ = ' ';
2711 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002712 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002713 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002714 "not all arguments converted");
2715 goto error;
2716 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002717 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002718 } /* '%' */
2719 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002720 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002721 PyErr_SetString(PyExc_TypeError,
2722 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002723 goto error;
2724 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002725 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002726 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002727 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002728 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002729 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002730
2731 unicode:
2732 if (args_owned) {
2733 Py_DECREF(args);
2734 args_owned = 0;
2735 }
2736 /* Fiddle args right (remove the first argidx-1 arguments) */
2737 --argidx;
2738 if (PyTuple_Check(orig_args) && argidx > 0) {
2739 PyObject *v;
2740 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2741 v = PyTuple_New(n);
2742 if (v == NULL)
2743 goto error;
2744 while (--n >= 0) {
2745 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2746 Py_INCREF(w);
2747 PyTuple_SET_ITEM(v, n, w);
2748 }
2749 args = v;
2750 } else {
2751 Py_INCREF(orig_args);
2752 args = orig_args;
2753 }
2754 /* Paste rest of format string to what we have of the result
2755 string; we reuse result for this */
2756 rescnt = res - PyString_AS_STRING(result);
2757 fmtcnt = PyString_GET_SIZE(format) - \
2758 (fmt - PyString_AS_STRING(format));
2759 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2760 Py_DECREF(args);
2761 goto error;
2762 }
2763 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2764 format = result;
2765 /* Let Unicode do its magic */
2766 result = PyUnicode_Format(format, args);
2767 Py_DECREF(format);
2768 Py_DECREF(args);
2769 return result;
2770
Guido van Rossume5372401993-03-16 12:15:04 +00002771 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002772 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002773 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002774 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002775 }
Guido van Rossume5372401993-03-16 12:15:04 +00002776 return NULL;
2777}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002778
2779
2780#ifdef INTERN_STRINGS
2781
2782static PyObject *interned;
2783
2784void
2785PyString_InternInPlace(p)
2786 PyObject **p;
2787{
2788 register PyStringObject *s = (PyStringObject *)(*p);
2789 PyObject *t;
2790 if (s == NULL || !PyString_Check(s))
2791 Py_FatalError("PyString_InternInPlace: strings only please!");
2792 if ((t = s->ob_sinterned) != NULL) {
2793 if (t == (PyObject *)s)
2794 return;
2795 Py_INCREF(t);
2796 *p = t;
2797 Py_DECREF(s);
2798 return;
2799 }
2800 if (interned == NULL) {
2801 interned = PyDict_New();
2802 if (interned == NULL)
2803 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002804 }
2805 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2806 Py_INCREF(t);
2807 *p = s->ob_sinterned = t;
2808 Py_DECREF(s);
2809 return;
2810 }
2811 t = (PyObject *)s;
2812 if (PyDict_SetItem(interned, t, t) == 0) {
2813 s->ob_sinterned = t;
2814 return;
2815 }
2816 PyErr_Clear();
2817}
2818
2819
2820PyObject *
2821PyString_InternFromString(cp)
2822 const char *cp;
2823{
2824 PyObject *s = PyString_FromString(cp);
2825 if (s == NULL)
2826 return NULL;
2827 PyString_InternInPlace(&s);
2828 return s;
2829}
2830
2831#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002832
2833void
2834PyString_Fini()
2835{
2836 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002837 for (i = 0; i < UCHAR_MAX + 1; i++) {
2838 Py_XDECREF(characters[i]);
2839 characters[i] = NULL;
2840 }
2841#ifndef DONT_SHARE_SHORT_STRINGS
2842 Py_XDECREF(nullstring);
2843 nullstring = NULL;
2844#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002845#ifdef INTERN_STRINGS
2846 if (interned) {
2847 int pos, changed;
2848 PyObject *key, *value;
2849 do {
2850 changed = 0;
2851 pos = 0;
2852 while (PyDict_Next(interned, &pos, &key, &value)) {
2853 if (key->ob_refcnt == 2 && key == value) {
2854 PyDict_DelItem(interned, key);
2855 changed = 1;
2856 }
2857 }
2858 } while (changed);
2859 }
2860#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002861}