blob: 264ed9ac31d1544b624f243bb3e1b738988b37f1 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum71160aa1997-06-03 18:03:18 +000036#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000037#include <ctype.h>
38
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000039#ifdef COUNT_ALLOCS
40int null_strings, one_strings;
41#endif
42
Guido van Rossum03093a21994-09-28 15:51:32 +000043#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000044#include <limits.h>
45#else
46#ifndef UCHAR_MAX
47#define UCHAR_MAX 255
48#endif
49#endif
50
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000052#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055
56/*
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
64 not NULL.
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
70*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071PyObject *
72PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000073 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074 int size;
75{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000077#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
92 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 op = (PyStringObject *)
96 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000097 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return PyErr_NoMemory();
99 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000100 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101#ifdef CACHE_HASH
102 op->ob_shash = -1;
103#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000104#ifdef INTERN_STRINGS
105 op->ob_sinterned = NULL;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 _Py_NewReference(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000108 if (str != NULL)
109 memcpy(op->ob_sval, str, size);
110 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0) {
113 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 } else if (size == 1 && str != NULL) {
116 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121}
122
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123PyObject *
124PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000125 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126{
127 register unsigned int size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000129#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 if (size == 0 && (op = nullstring) != NULL) {
131#ifdef COUNT_ALLOCS
132 null_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138#ifdef COUNT_ALLOCS
139 one_strings++;
140#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000144#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000145 op = (PyStringObject *)
146 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000147 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return PyErr_NoMemory();
149 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000150 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151#ifdef CACHE_HASH
152 op->ob_shash = -1;
153#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000154#ifdef INTERN_STRINGS
155 op->ob_sinterned = NULL;
156#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000157 _Py_NewReference(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000158 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000159#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 if (size == 0) {
161 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000162 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000163 } else if (size == 1) {
164 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000165 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000166 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000167#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000168 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169}
170
Guido van Rossum234f9421993-06-17 12:35:49 +0000171static void
Guido van Rossume5372401993-03-16 12:15:04 +0000172string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000173 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000174{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000175 PyMem_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000176}
177
Guido van Rossumd7047b31995-01-02 19:07:15 +0000178int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000179PyString_Size(op)
180 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000182 if (!PyString_Check(op)) {
183 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184 return -1;
185 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000186 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000187}
188
189/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000190PyString_AsString(op)
191 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000193 if (!PyString_Check(op)) {
194 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 return NULL;
196 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000197 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198}
199
200/* Methods */
201
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000202static int
Guido van Rossume5372401993-03-16 12:15:04 +0000203string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000204 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 FILE *fp;
206 int flags;
207{
208 int i;
209 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000210 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000212 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000214 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216
217 /* figure out which quote to use; single is prefered */
218 quote = '\'';
219 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
220 quote = '"';
221
222 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 for (i = 0; i < op->ob_size; i++) {
224 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 fprintf(fp, "\\%c", c);
227 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000228 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000232 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000233 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234}
235
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000236static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000237string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239{
240 /* XXX overflow? */
241 int newsize = 2 + 4 * op->ob_size * sizeof(char);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000242 PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000244 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000245 }
246 else {
247 register int i;
248 register char c;
249 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000250 int quote;
251
252 /* figure out which quote to use; single is prefered */
253 quote = '\'';
254 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
255 quote = '"';
256
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000257 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 for (i = 0; i < op->ob_size; i++) {
260 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 *p++ = '\\', *p++ = c;
263 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000264 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 while (*p != '\0')
266 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 }
268 else
269 *p++ = c;
270 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000271 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000273 _PyString_Resize(
274 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000275 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277}
278
279static int
Guido van Rossume5372401993-03-16 12:15:04 +0000280string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282{
283 return a->ob_size;
284}
285
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000286static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000287string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000288 register PyStringObject *a;
289 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290{
291 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 register PyStringObject *op;
293 if (!PyString_Check(bb)) {
294 PyErr_BadArgument();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000295 return NULL;
296 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000297#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 /* Optimize cases with empty left or right operand */
299 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000300 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000301 return bb;
302 }
303 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000304 Py_INCREF(a);
305 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000306 }
307 size = a->ob_size + b->ob_size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000308 op = (PyStringObject *)
309 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000310 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000311 return PyErr_NoMemory();
312 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000313 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000314#ifdef CACHE_HASH
315 op->ob_shash = -1;
316#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000317#ifdef INTERN_STRINGS
318 op->ob_sinterned = NULL;
319#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000320 _Py_NewReference(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000321 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
322 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
323 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000324 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000325#undef b
326}
327
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000328static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000329string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000330 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000331 register int n;
332{
333 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000334 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000335 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 if (n < 0)
337 n = 0;
338 size = a->ob_size * n;
339 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000340 Py_INCREF(a);
341 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000342 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000343 op = (PyStringObject *)
344 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000345 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000346 return PyErr_NoMemory();
347 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000348 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000349#ifdef CACHE_HASH
350 op->ob_shash = -1;
351#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000352#ifdef INTERN_STRINGS
353 op->ob_sinterned = NULL;
354#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000355 _Py_NewReference(op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000356 for (i = 0; i < size; i += a->ob_size)
357 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
358 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000359 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000360}
361
362/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
363
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000364static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000365string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000367 register int i, j; /* May be negative! */
368{
369 if (i < 0)
370 i = 0;
371 if (j < 0)
372 j = 0; /* Avoid signed/unsigned bug in next line */
373 if (j > a->ob_size)
374 j = a->ob_size;
375 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000376 Py_INCREF(a);
377 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000378 }
379 if (j < i)
380 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000381 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382}
383
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000384static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000385string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000386 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000387 register int i;
388{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000389 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000390 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000392 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000393 return NULL;
394 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000395 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000396 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000397#ifdef COUNT_ALLOCS
398 if (v != NULL)
399 one_strings++;
400#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000401 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000402 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000403 if (v == NULL)
404 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000405 characters[c] = (PyStringObject *) v;
406 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000407 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000408 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000409 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410}
411
412static int
Guido van Rossume5372401993-03-16 12:15:04 +0000413string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000414 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000415{
Guido van Rossum253919f1991-02-13 23:18:39 +0000416 int len_a = a->ob_size, len_b = b->ob_size;
417 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000418 int cmp;
419 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000420 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000421 if (cmp == 0)
422 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
423 if (cmp != 0)
424 return cmp;
425 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000426 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427}
428
Guido van Rossum9bfef441993-03-29 10:43:31 +0000429static long
430string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000431 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000432{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000433 register int len;
434 register unsigned char *p;
435 register long x;
436
437#ifdef CACHE_HASH
438 if (a->ob_shash != -1)
439 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000440#ifdef INTERN_STRINGS
441 if (a->ob_sinterned != NULL)
442 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000443 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000444#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000445#endif
446 len = a->ob_size;
447 p = (unsigned char *) a->ob_sval;
448 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000449 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000450 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000451 x ^= a->ob_size;
452 if (x == -1)
453 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000454#ifdef CACHE_HASH
455 a->ob_shash = x;
456#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000457 return x;
458}
459
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000460static int
461string_buffer_getreadbuf(self, index, ptr)
462 PyStringObject *self;
463 int index;
464 const void **ptr;
465{
466 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000467 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000468 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000469 return -1;
470 }
471 *ptr = (void *)self->ob_sval;
472 return self->ob_size;
473}
474
475static int
476string_buffer_getwritebuf(self, index, ptr)
477 PyStringObject *self;
478 int index;
479 const void **ptr;
480{
Guido van Rossum045e6881997-09-08 18:30:11 +0000481 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000482 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000483 return -1;
484}
485
486static int
487string_buffer_getsegcount(self, lenp)
488 PyStringObject *self;
489 int *lenp;
490{
491 if ( lenp )
492 *lenp = self->ob_size;
493 return 1;
494}
495
Guido van Rossum1db70701998-10-08 02:18:52 +0000496static int
497string_buffer_getcharbuf(self, index, ptr)
498 PyStringObject *self;
499 int index;
500 const char **ptr;
501{
502 if ( index != 0 ) {
503 PyErr_SetString(PyExc_SystemError,
504 "accessing non-existent string segment");
505 return -1;
506 }
507 *ptr = self->ob_sval;
508 return self->ob_size;
509}
510
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000511static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000512 (inquiry)string_length, /*sq_length*/
513 (binaryfunc)string_concat, /*sq_concat*/
514 (intargfunc)string_repeat, /*sq_repeat*/
515 (intargfunc)string_item, /*sq_item*/
516 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000517 0, /*sq_ass_item*/
518 0, /*sq_ass_slice*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519};
520
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000521static PyBufferProcs string_as_buffer = {
522 (getreadbufferproc)string_buffer_getreadbuf,
523 (getwritebufferproc)string_buffer_getwritebuf,
524 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000525 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000526};
527
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000528
529
530#define LEFTSTRIP 0
531#define RIGHTSTRIP 1
532#define BOTHSTRIP 2
533
534
535static PyObject *
536split_whitespace(s, len, maxsplit)
537 char *s;
538 int len;
539 int maxsplit;
540{
541 int i = 0, j, err;
542 int countsplit = 0;
543 PyObject* item;
544 PyObject *list = PyList_New(0);
545
546 if (list == NULL)
547 return NULL;
548
549 while (i < len) {
550 while (i < len && isspace(Py_CHARMASK(s[i]))) {
551 i = i+1;
552 }
553 j = i;
554 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
555 i = i+1;
556 }
557 if (j < i) {
558 item = PyString_FromStringAndSize(s+j, (int)(i-j));
559 if (item == NULL)
560 goto finally;
561
562 err = PyList_Append(list, item);
563 Py_DECREF(item);
564 if (err < 0)
565 goto finally;
566
567 countsplit++;
568 while (i < len && isspace(Py_CHARMASK(s[i]))) {
569 i = i+1;
570 }
571 if (maxsplit && (countsplit >= maxsplit) && i < len) {
572 item = PyString_FromStringAndSize(
573 s+i, (int)(len - i));
574 if (item == NULL)
575 goto finally;
576
577 err = PyList_Append(list, item);
578 Py_DECREF(item);
579 if (err < 0)
580 goto finally;
581
582 i = len;
583 }
584 }
585 }
586 return list;
587 finally:
588 Py_DECREF(list);
589 return NULL;
590}
591
592
593static char split__doc__[] =
594"S.split([sep [,maxsplit]]) -> list of strings\n\
595\n\
596Return a list of the words in the string S, using sep as the\n\
597delimiter string. If maxsplit is nonzero, splits into at most\n\
598maxsplit words If sep is not specified, any whitespace string\n\
599is a separator. Maxsplit defaults to 0.";
600
601static PyObject *
602string_split(self, args)
603 PyStringObject *self;
604 PyObject *args;
605{
606 int len = PyString_GET_SIZE(self), n, i, j, err;
607 int splitcount, maxsplit;
608 char *s = PyString_AS_STRING(self), *sub;
609 PyObject *list, *item;
610
611 sub = NULL;
612 n = 0;
613 splitcount = 0;
614 maxsplit = 0;
615 if (!PyArg_ParseTuple(args, "|z#i", &sub, &n, &maxsplit))
616 return NULL;
617 if (sub == NULL)
618 return split_whitespace(s, len, maxsplit);
619 if (n == 0) {
620 PyErr_SetString(PyExc_ValueError, "empty separator");
621 return NULL;
622 }
623
624 list = PyList_New(0);
625 if (list == NULL)
626 return NULL;
627
628 i = j = 0;
629 while (i+n <= len) {
630 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
631 item = PyString_FromStringAndSize(s+j, (int)(i-j));
632 if (item == NULL)
633 goto fail;
634 err = PyList_Append(list, item);
635 Py_DECREF(item);
636 if (err < 0)
637 goto fail;
638 i = j = i + n;
639 splitcount++;
640 if (maxsplit && (splitcount >= maxsplit))
641 break;
642 }
643 else
644 i++;
645 }
646 item = PyString_FromStringAndSize(s+j, (int)(len-j));
647 if (item == NULL)
648 goto fail;
649 err = PyList_Append(list, item);
650 Py_DECREF(item);
651 if (err < 0)
652 goto fail;
653
654 return list;
655
656 fail:
657 Py_DECREF(list);
658 return NULL;
659}
660
661
662static char join__doc__[] =
663"S.join(sequence) -> string\n\
664\n\
665Return a string which is the concatenation of the string representation\n\
666of every element in the sequence. The separator between elements is S.";
667
668static PyObject *
669string_join(self, args)
670 PyStringObject *self;
671 PyObject *args;
672{
673 char *sep = PyString_AS_STRING(self);
674 int seplen = PyString_GET_SIZE(self);
675 PyObject *res = NULL;
676 int reslen = 0;
677 char *p;
678 int seqlen = 0;
679 int sz = 100;
680 int i, slen;
681 PyObject *seq;
682
683 if (!PyArg_ParseTuple(args, "O", &seq))
684 return NULL;
685
686 seqlen = PySequence_Length(seq);
687 if (seqlen < 0 && PyErr_Occurred())
688 return NULL;
689
690 if (seqlen == 1) {
691 /* Optimization if there's only one item */
692 PyObject *item = PySequence_GetItem(seq, 0);
693 PyObject *stritem = PyObject_Str(item);
694 Py_DECREF(item);
695 return stritem;
696 }
697 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
698 return NULL;
699 p = PyString_AsString(res);
700
701 /* optimize for lists. all others (tuples and arbitrary sequences)
702 * just use the abstract interface.
703 */
704 if (PyList_Check(seq)) {
705 for (i = 0; i < seqlen; i++) {
706 PyObject *item = PyList_GET_ITEM(seq, i);
707 PyObject *sitem = PyObject_Str(item);
708 if (!sitem)
709 goto finally;
710 slen = PyString_GET_SIZE(sitem);
711 while (reslen + slen + seplen >= sz) {
712 if (_PyString_Resize(&res, sz*2))
713 goto finally;
714 sz *= 2;
715 p = PyString_AsString(res) + reslen;
716 }
717 if (i > 0) {
718 memcpy(p, sep, seplen);
719 p += seplen;
720 reslen += seplen;
721 }
722 memcpy(p, PyString_AS_STRING(sitem), slen);
723 p += slen;
724 reslen += slen;
725 }
726 }
727 else {
728 for (i = 0; i < seqlen; i++) {
729 PyObject *item = PySequence_GetItem(seq, i);
730 PyObject *sitem;
731 if (!item || !(sitem = PyObject_Str(item))) {
732 Py_XDECREF(item);
733 goto finally;
734 }
735 slen = PyString_GET_SIZE(sitem);
736 while (reslen + slen + seplen >= sz) {
737 if (_PyString_Resize(&res, sz*2))
738 goto finally;
739 sz *= 2;
740 p = PyString_AsString(res) + reslen;
741 }
742 if (i > 0) {
743 memcpy(p, sep, seplen);
744 p += seplen;
745 reslen += seplen;
746 }
747 memcpy(p, PyString_AS_STRING(sitem), slen);
748 p += slen;
749 reslen += slen;
750 }
751 }
752 if (_PyString_Resize(&res, reslen))
753 goto finally;
754 return res;
755
756 finally:
757 Py_DECREF(res);
758 return NULL;
759}
760
761
762
763static long
764string_find_internal(self, args)
765 PyStringObject *self;
766 PyObject *args;
767{
768 char *s = PyString_AS_STRING(self), *sub;
769 int len = PyString_GET_SIZE(self);
770 int n, i = 0, last = INT_MAX;
771
772 if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last))
773 return -2;
774
775 if (last > len)
776 last = len;
777 if (last < 0)
778 last += len;
779 if (last < 0)
780 last = 0;
781 if (i < 0)
782 i += len;
783 if (i < 0)
784 i = 0;
785
786 if (n == 0 && i <= last)
787 return (long)i;
788
789 last -= n;
790 for (; i <= last; ++i)
791 if (s[i] == sub[0] &&
792 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
793 return (long)i;
794
795 return -1;
796}
797
798
799static char find__doc__[] =
800"S.find(sub [,start [,end]]) -> int\n\
801\n\
802Return the lowest index in S where substring sub is found,\n\
803such that sub is contained within s[start,end]. Optional\n\
804arguments start and end are interpreted as in slice notation.\n\
805\n\
806Return -1 on failure.";
807
808static PyObject *
809string_find(self, args)
810 PyStringObject *self;
811 PyObject *args;
812{
813 long result = string_find_internal(self, args);
814 if (result == -2)
815 return NULL;
816 return PyInt_FromLong(result);
817}
818
819
820static char index__doc__[] =
821"S.index(sub [,start [,end]]) -> int\n\
822\n\
823Like S.find() but raise ValueError when the substring is not found.";
824
825static PyObject *
826string_index(self, args)
827 PyStringObject *self;
828 PyObject *args;
829{
830 long result = string_find_internal(self, args);
831 if (result == -2)
832 return NULL;
833 if (result == -1) {
834 PyErr_SetString(PyExc_ValueError,
835 "substring not found in string.index");
836 return NULL;
837 }
838 return PyInt_FromLong(result);
839}
840
841
842static long
843string_rfind_internal(self, args)
844 PyStringObject *self;
845 PyObject *args;
846{
847 char *s = PyString_AS_STRING(self), *sub;
848 int len = PyString_GET_SIZE(self), n, j;
849 int i = 0, last = INT_MAX;
850
851 if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last))
852 return -2;
853
854 if (last > len)
855 last = len;
856 if (last < 0)
857 last += len;
858 if (last < 0)
859 last = 0;
860 if (i < 0)
861 i += len;
862 if (i < 0)
863 i = 0;
864
865 if (n == 0 && i <= last)
866 return (long)last;
867
868 for (j = last-n; j >= i; --j)
869 if (s[j] == sub[0] &&
870 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
871 return (long)j;
872
873 return -1;
874}
875
876
877static char rfind__doc__[] =
878"S.rfind(sub [,start [,end]]) -> int\n\
879\n\
880Return the highest index in S where substring sub is found,\n\
881such that sub is contained within s[start,end]. Optional\n\
882arguments start and end are interpreted as in slice notation.\n\
883\n\
884Return -1 on failure.";
885
886static PyObject *
887string_rfind(self, args)
888 PyStringObject *self;
889 PyObject *args;
890{
891 long result = string_rfind_internal(self, args);
892 if (result == -2)
893 return NULL;
894 return PyInt_FromLong(result);
895}
896
897
898static char rindex__doc__[] =
899"S.rindex(sub [,start [,end]]) -> int\n\
900\n\
901Like S.rfind() but raise ValueError when the substring is not found.";
902
903static PyObject *
904string_rindex(self, args)
905 PyStringObject *self;
906 PyObject *args;
907{
908 long result = string_rfind_internal(self, args);
909 if (result == -2)
910 return NULL;
911 if (result == -1) {
912 PyErr_SetString(PyExc_ValueError,
913 "substring not found in string.rindex");
914 return NULL;
915 }
916 return PyInt_FromLong(result);
917}
918
919
920static PyObject *
921do_strip(self, args, striptype)
922 PyStringObject *self;
923 PyObject *args;
924 int striptype;
925{
926 char *s = PyString_AS_STRING(self);
927 int len = PyString_GET_SIZE(self), i, j;
928 PyObject *scobj = NULL;
929 int count = -1;
930
931 if (!PyArg_ParseTuple(args, "|Oi", scobj, count))
932 return NULL;
933
934 i = 0;
935 if (striptype != RIGHTSTRIP) {
936 while (i < len && isspace(Py_CHARMASK(s[i]))) {
937 i++;
938 }
939 }
940
941 j = len;
942 if (striptype != LEFTSTRIP) {
943 do {
944 j--;
945 } while (j >= i && isspace(Py_CHARMASK(s[j])));
946 j++;
947 }
948
949 if (i == 0 && j == len) {
950 Py_INCREF(self);
951 return (PyObject*)self;
952 }
953 else
954 return PyString_FromStringAndSize(s+i, j-i);
955}
956
957
958static char strip__doc__[] =
959"S.strip() -> string\n\
960\n\
961Return a copy of the string S with leading and trailing\n\
962whitespace removed.";
963
964static PyObject *
965string_strip(self, args)
966 PyStringObject *self;
967 PyObject *args;
968{
969 return do_strip(self, args, BOTHSTRIP);
970}
971
972
973static char lstrip__doc__[] =
974"S.lstrip() -> string\n\
975\n\
976Return a copy of the string S with leading whitespace removed.";
977
978static PyObject *
979string_lstrip(self, args)
980 PyStringObject *self;
981 PyObject *args;
982{
983 return do_strip(self, args, LEFTSTRIP);
984}
985
986
987static char rstrip__doc__[] =
988"S.rstrip() -> string\n\
989\n\
990Return a copy of the string S with trailing whitespace removed.";
991
992static PyObject *
993string_rstrip(self, args)
994 PyStringObject *self;
995 PyObject *args;
996{
997 return do_strip(self, args, RIGHTSTRIP);
998}
999
1000
1001static char lower__doc__[] =
1002"S.lower() -> string\n\
1003\n\
1004Return a copy of the string S converted to lowercase.";
1005
1006static PyObject *
1007string_lower(self, args)
1008 PyStringObject *self;
1009 PyObject *args;
1010{
1011 char *s = PyString_AS_STRING(self), *s_new;
1012 int i, n = PyString_GET_SIZE(self);
1013 PyObject *new;
1014
1015 if (!PyArg_ParseTuple(args, ""))
1016 return NULL;
1017 new = PyString_FromStringAndSize(NULL, n);
1018 if (new == NULL)
1019 return NULL;
1020 s_new = PyString_AsString(new);
1021 for (i = 0; i < n; i++) {
1022 int c = Py_CHARMASK(*s++);
1023 if (isupper(c)) {
1024 *s_new = tolower(c);
1025 } else
1026 *s_new = c;
1027 s_new++;
1028 }
1029 return new;
1030}
1031
1032
1033static char upper__doc__[] =
1034"S.upper() -> string\n\
1035\n\
1036Return a copy of the string S converted to uppercase.";
1037
1038static PyObject *
1039string_upper(self, args)
1040 PyStringObject *self;
1041 PyObject *args;
1042{
1043 char *s = PyString_AS_STRING(self), *s_new;
1044 int i, n = PyString_GET_SIZE(self);
1045 PyObject *new;
1046
1047 if (!PyArg_ParseTuple(args, ""))
1048 return NULL;
1049 new = PyString_FromStringAndSize(NULL, n);
1050 if (new == NULL)
1051 return NULL;
1052 s_new = PyString_AsString(new);
1053 for (i = 0; i < n; i++) {
1054 int c = Py_CHARMASK(*s++);
1055 if (islower(c)) {
1056 *s_new = toupper(c);
1057 } else
1058 *s_new = c;
1059 s_new++;
1060 }
1061 return new;
1062}
1063
1064
1065static char capitalize__doc__[] =
1066"S.capitalize() -> string\n\
1067\n\
1068Return a copy of the string S with only its first character\n\
1069capitalized.";
1070
1071static PyObject *
1072string_capitalize(self, args)
1073 PyStringObject *self;
1074 PyObject *args;
1075{
1076 char *s = PyString_AS_STRING(self), *s_new;
1077 int i, n = PyString_GET_SIZE(self);
1078 PyObject *new;
1079
1080 if (!PyArg_ParseTuple(args, ""))
1081 return NULL;
1082 new = PyString_FromStringAndSize(NULL, n);
1083 if (new == NULL)
1084 return NULL;
1085 s_new = PyString_AsString(new);
1086 if (0 < n) {
1087 int c = Py_CHARMASK(*s++);
1088 if (islower(c))
1089 *s_new = toupper(c);
1090 else
1091 *s_new = c;
1092 s_new++;
1093 }
1094 for (i = 1; i < n; i++) {
1095 int c = Py_CHARMASK(*s++);
1096 if (isupper(c))
1097 *s_new = tolower(c);
1098 else
1099 *s_new = c;
1100 s_new++;
1101 }
1102 return new;
1103}
1104
1105
1106static char count__doc__[] =
1107"S.count(sub[, start[, end]]) -> int\n\
1108\n\
1109Return the number of occurrences of substring sub in string\n\
1110S[start:end]. Optional arguments start and end are\n\
1111interpreted as in slice notation.";
1112
1113static PyObject *
1114string_count(self, args)
1115 PyStringObject *self;
1116 PyObject *args;
1117{
1118 char *s = PyString_AS_STRING(self), *sub;
1119 int len = PyString_GET_SIZE(self), n;
1120 int i = 0, last = INT_MAX;
1121 int m, r;
1122
1123 if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last))
1124 return NULL;
1125 if (last > len)
1126 last = len;
1127 if (last < 0)
1128 last += len;
1129 if (last < 0)
1130 last = 0;
1131 if (i < 0)
1132 i += len;
1133 if (i < 0)
1134 i = 0;
1135 m = last + 1 - n;
1136 if (n == 0)
1137 return PyInt_FromLong((long) (m-i));
1138
1139 r = 0;
1140 while (i < m) {
1141 if (!memcmp(s+i, sub, n)) {
1142 r++;
1143 i += n;
1144 } else {
1145 i++;
1146 }
1147 }
1148 return PyInt_FromLong((long) r);
1149}
1150
1151
1152static char swapcase__doc__[] =
1153"S.swapcase() -> string\n\
1154\n\
1155Return a copy of the string S with upper case characters\n\
1156converted to lowercase and vice versa.";
1157
1158static PyObject *
1159string_swapcase(self, args)
1160 PyStringObject *self;
1161 PyObject *args;
1162{
1163 char *s = PyString_AS_STRING(self), *s_new;
1164 int i, n = PyString_GET_SIZE(self);
1165 PyObject *new;
1166
1167 if (!PyArg_ParseTuple(args, ""))
1168 return NULL;
1169 new = PyString_FromStringAndSize(NULL, n);
1170 if (new == NULL)
1171 return NULL;
1172 s_new = PyString_AsString(new);
1173 for (i = 0; i < n; i++) {
1174 int c = Py_CHARMASK(*s++);
1175 if (islower(c)) {
1176 *s_new = toupper(c);
1177 }
1178 else if (isupper(c)) {
1179 *s_new = tolower(c);
1180 }
1181 else
1182 *s_new = c;
1183 s_new++;
1184 }
1185 return new;
1186}
1187
1188
1189static char translate__doc__[] =
1190"S.translate(table [,deletechars]) -> string\n\
1191\n\
1192Return a copy of the string S, where all characters occurring\n\
1193in the optional argument deletechars are removed, and the\n\
1194remaining characters have been mapped through the given\n\
1195translation table, which must be a string of length 256.";
1196
1197static PyObject *
1198string_translate(self, args)
1199 PyStringObject *self;
1200 PyObject *args;
1201{
1202 register char *input, *table, *output;
1203 register int i, c, changed = 0;
1204 PyObject *input_obj = (PyObject*)self;
1205 char *table1, *output_start, *del_table=NULL;
1206 int inlen, tablen, dellen = 0;
1207 PyObject *result;
1208 int trans_table[256];
1209
1210 if (!PyArg_ParseTuple(args, "t#|t#",
1211 &table1, &tablen, &del_table, &dellen))
1212 return NULL;
1213 if (tablen != 256) {
1214 PyErr_SetString(PyExc_ValueError,
1215 "translation table must be 256 characters long");
1216 return NULL;
1217 }
1218
1219 table = table1;
1220 inlen = PyString_Size(input_obj);
1221 result = PyString_FromStringAndSize((char *)NULL, inlen);
1222 if (result == NULL)
1223 return NULL;
1224 output_start = output = PyString_AsString(result);
1225 input = PyString_AsString(input_obj);
1226
1227 if (dellen == 0) {
1228 /* If no deletions are required, use faster code */
1229 for (i = inlen; --i >= 0; ) {
1230 c = Py_CHARMASK(*input++);
1231 if (Py_CHARMASK((*output++ = table[c])) != c)
1232 changed = 1;
1233 }
1234 if (changed)
1235 return result;
1236 Py_DECREF(result);
1237 Py_INCREF(input_obj);
1238 return input_obj;
1239 }
1240
1241 for (i = 0; i < 256; i++)
1242 trans_table[i] = Py_CHARMASK(table[i]);
1243
1244 for (i = 0; i < dellen; i++)
1245 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1246
1247 for (i = inlen; --i >= 0; ) {
1248 c = Py_CHARMASK(*input++);
1249 if (trans_table[c] != -1)
1250 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1251 continue;
1252 changed = 1;
1253 }
1254 if (!changed) {
1255 Py_DECREF(result);
1256 Py_INCREF(input_obj);
1257 return input_obj;
1258 }
1259 /* Fix the size of the resulting string */
1260 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1261 return NULL;
1262 return result;
1263}
1264
1265
1266/* What follows is used for implementing replace(). Perry Stoll. */
1267
1268/*
1269 mymemfind
1270
1271 strstr replacement for arbitrary blocks of memory.
1272
1273 Locates the first occurance in the memory pointed to by MEM of the
1274 contents of memory pointed to by PAT. Returns the index into MEM if
1275 found, or -1 if not found. If len of PAT is greater than length of
1276 MEM, the function returns -1.
1277*/
1278static int
1279mymemfind(mem, len, pat, pat_len)
1280 char *mem;
1281 int len;
1282 char *pat;
1283 int pat_len;
1284{
1285 register int ii;
1286
1287 /* pattern can not occur in the last pat_len-1 chars */
1288 len -= pat_len;
1289
1290 for (ii = 0; ii <= len; ii++) {
1291 if (mem[ii] == pat[0] &&
1292 (pat_len == 1 ||
1293 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1294 return ii;
1295 }
1296 }
1297 return -1;
1298}
1299
1300/*
1301 mymemcnt
1302
1303 Return the number of distinct times PAT is found in MEM.
1304 meaning mem=1111 and pat==11 returns 2.
1305 mem=11111 and pat==11 also return 2.
1306 */
1307static int
1308mymemcnt(mem, len, pat, pat_len)
1309 char *mem;
1310 int len;
1311 char *pat;
1312 int pat_len;
1313{
1314 register int offset = 0;
1315 int nfound = 0;
1316
1317 while (len >= 0) {
1318 offset = mymemfind(mem, len, pat, pat_len);
1319 if (offset == -1)
1320 break;
1321 mem += offset + pat_len;
1322 len -= offset + pat_len;
1323 nfound++;
1324 }
1325 return nfound;
1326}
1327
1328/*
1329 mymemreplace
1330
1331 Return a string in which all occurences of PAT in memory STR are
1332 replaced with SUB.
1333
1334 If length of PAT is less than length of STR or there are no occurences
1335 of PAT in STR, then the original string is returned. Otherwise, a new
1336 string is allocated here and returned.
1337
1338 on return, out_len is:
1339 the length of output string, or
1340 -1 if the input string is returned, or
1341 unchanged if an error occurs (no memory).
1342
1343 return value is:
1344 the new string allocated locally, or
1345 NULL if an error occurred.
1346*/
1347static char *
1348mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1349 char *str;
1350 int len; /* input string */
1351 char *pat;
1352 int pat_len; /* pattern string to find */
1353 char *sub;
1354 int sub_len; /* substitution string */
1355 int count; /* number of replacements, 0 == all */
1356 int *out_len;
1357
1358{
1359 char *out_s;
1360 char *new_s;
1361 int nfound, offset, new_len;
1362
1363 if (len == 0 || pat_len > len)
1364 goto return_same;
1365
1366 /* find length of output string */
1367 nfound = mymemcnt(str, len, pat, pat_len);
1368 if (count > 0)
1369 nfound = nfound > count ? count : nfound;
1370 if (nfound == 0)
1371 goto return_same;
1372 new_len = len + nfound*(sub_len - pat_len);
1373
1374 new_s = (char *)malloc(new_len);
1375 if (new_s == NULL) return NULL;
1376
1377 *out_len = new_len;
1378 out_s = new_s;
1379
1380 while (len > 0) {
1381 /* find index of next instance of pattern */
1382 offset = mymemfind(str, len, pat, pat_len);
1383 /* if not found, break out of loop */
1384 if (offset == -1) break;
1385
1386 /* copy non matching part of input string */
1387 memcpy(new_s, str, offset); /* copy part of str before pat */
1388 str += offset + pat_len; /* move str past pattern */
1389 len -= offset + pat_len; /* reduce length of str remaining */
1390
1391 /* copy substitute into the output string */
1392 new_s += offset; /* move new_s to dest for sub string */
1393 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1394 new_s += sub_len; /* offset new_s past sub string */
1395
1396 /* break when we've done count replacements */
1397 if (--count == 0) break;
1398 }
1399 /* copy any remaining values into output string */
1400 if (len > 0)
1401 memcpy(new_s, str, len);
1402 return out_s;
1403
1404 return_same:
1405 *out_len = -1;
1406 return str;
1407}
1408
1409
1410static char replace__doc__[] =
1411"S.replace (old, new[, maxsplit]) -> string\n\
1412\n\
1413Return a copy of string S with all occurrences of substring\n\
1414old replaced by new. If the optional argument maxsplit is\n\
1415given, only the first maxsplit occurrences are replaced.";
1416
1417static PyObject *
1418string_replace(self, args)
1419 PyStringObject *self;
1420 PyObject *args;
1421{
1422 char *str = PyString_AS_STRING(self), *pat,*sub,*new_s;
1423 int len = PyString_GET_SIZE(self), pat_len,sub_len,out_len;
1424 int count = 0;
1425 PyObject *new;
1426
1427 if (!PyArg_ParseTuple(args, "t#t#|i",
1428 &pat, &pat_len, &sub, &sub_len, &count))
1429 return NULL;
1430 if (pat_len <= 0) {
1431 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1432 return NULL;
1433 }
1434 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1435 if (new_s == NULL) {
1436 PyErr_NoMemory();
1437 return NULL;
1438 }
1439 if (out_len == -1) {
1440 /* we're returning another reference to self */
1441 new = (PyObject*)self;
1442 Py_INCREF(new);
1443 }
1444 else {
1445 new = PyString_FromStringAndSize(new_s, out_len);
1446 free(new_s);
1447 }
1448 return new;
1449}
1450
1451
1452static char startswith__doc__[] =
1453"S.startswith(prefix[, start[, end]]) -> int\n\
1454\n\
1455Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1456optional start, test S beginning at that position. With optional end, stop\n\
1457comparing S at that position.";
1458
1459static PyObject *
1460string_startswith(self, args)
1461 PyStringObject *self;
1462 PyObject *args;
1463{
1464 char* str = PyString_AS_STRING(self);
1465 int len = PyString_GET_SIZE(self);
1466 char* prefix;
1467 int plen;
1468 int start = 0;
1469 int end = -1;
1470
1471 if (!PyArg_ParseTuple(args, "t#|ii", &prefix, &plen, &start, &end))
1472 return NULL;
1473
1474 /* adopt Java semantics for index out of range. it is legal for
1475 * offset to be == plen, but this only returns true if prefix is
1476 * the empty string.
1477 */
1478 if (start < 0 || start+plen > len)
1479 return PyInt_FromLong(0);
1480
1481 if (!memcmp(str+start, prefix, plen)) {
1482 /* did the match end after the specified end? */
1483 if (end < 0)
1484 return PyInt_FromLong(1);
1485 else if (end - start < plen)
1486 return PyInt_FromLong(0);
1487 else
1488 return PyInt_FromLong(1);
1489 }
1490 else return PyInt_FromLong(0);
1491}
1492
1493
1494static char endswith__doc__[] =
1495"S.endswith(suffix[, start[, end]]) -> int\n\
1496\n\
1497Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1498optional start, test S beginning at that position. With optional end, stop\n\
1499comparing S at that position.";
1500
1501static PyObject *
1502string_endswith(self, args)
1503 PyStringObject *self;
1504 PyObject *args;
1505{
1506 char* str = PyString_AS_STRING(self);
1507 int len = PyString_GET_SIZE(self);
1508 char* suffix;
1509 int plen;
1510 int start = 0;
1511 int end = -1;
1512 int lower, upper;
1513
1514 if (!PyArg_ParseTuple(args, "t#|ii", &suffix, &plen, &start, &end))
1515 return NULL;
1516
1517 if (start < 0 || start > len || plen > len)
1518 return PyInt_FromLong(0);
1519
1520 upper = (end >= 0 && end <= len) ? end : len;
1521 lower = (upper - plen) > start ? (upper - plen) : start;
1522
1523 if (upper-lower >= plen && !memcmp(str+lower, suffix, plen))
1524 return PyInt_FromLong(1);
1525 else return PyInt_FromLong(0);
1526}
1527
1528
1529
1530static PyMethodDef
1531string_methods[] = {
1532 /* counterparts of the obsolete stropmodule functions */
1533 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
1534 {"count", (PyCFunction)string_count, 1, count__doc__},
1535 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
1536 {"find", (PyCFunction)string_find, 1, find__doc__},
1537 {"index", (PyCFunction)string_index, 1, index__doc__},
1538 {"join", (PyCFunction)string_join, 1, join__doc__},
1539 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
1540 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
1541 /* maketrans */
1542 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
1543 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
1544 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
1545 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
1546 {"split", (PyCFunction)string_split, 1, split__doc__},
1547 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
1548 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
1549 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
1550 {"translate", (PyCFunction)string_translate, 1, strip__doc__},
1551 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
1552 /* TBD */
1553/* {"ljust" (PyCFunction)string_ljust, 1, ljust__doc__}, */
1554/* {"rjust" (PyCFunction)string_rjust, 1, rjust__doc__}, */
1555/* {"center" (PyCFunction)string_center, 1, center__doc__}, */
1556/* {"zfill" (PyCFunction)string_zfill, 1, zfill__doc__}, */
1557/* {"expandtabs" (PyCFunction)string_expandtabs, 1, ljust__doc__}, */
1558/* {"capwords" (PyCFunction)string_capwords, 1, capwords__doc__}, */
1559 {NULL, NULL} /* sentinel */
1560};
1561
1562static PyObject *
1563string_getattr(s, name)
1564 PyStringObject *s;
1565 char *name;
1566{
1567 return Py_FindMethod(string_methods, (PyObject*)s, name);
1568}
1569
1570
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001571PyTypeObject PyString_Type = {
1572 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001573 0,
1574 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001575 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001576 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00001577 (destructor)string_dealloc, /*tp_dealloc*/
1578 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001580 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001581 (cmpfunc)string_compare, /*tp_compare*/
1582 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001583 0, /*tp_as_number*/
1584 &string_as_sequence, /*tp_as_sequence*/
1585 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001586 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00001587 0, /*tp_call*/
1588 0, /*tp_str*/
1589 0, /*tp_getattro*/
1590 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001591 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00001592 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00001593 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001594};
1595
1596void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001597PyString_Concat(pv, w)
1598 register PyObject **pv;
1599 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001600{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001601 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00001602 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001603 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001604 if (w == NULL || !PyString_Check(*pv)) {
1605 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00001606 *pv = NULL;
1607 return;
1608 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001609 v = string_concat((PyStringObject *) *pv, w);
1610 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001611 *pv = v;
1612}
1613
Guido van Rossum013142a1994-08-30 08:19:36 +00001614void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001615PyString_ConcatAndDel(pv, w)
1616 register PyObject **pv;
1617 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00001618{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001619 PyString_Concat(pv, w);
1620 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00001621}
1622
1623
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001624/* The following function breaks the notion that strings are immutable:
1625 it changes the size of a string. We get away with this only if there
1626 is only one module referencing the object. You can also think of it
1627 as creating a new string object and destroying the old one, only
1628 more efficiently. In any case, don't use this if the string may
1629 already be known to some other part of the code... */
1630
1631int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001632_PyString_Resize(pv, newsize)
1633 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001634 int newsize;
1635{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001636 register PyObject *v;
1637 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00001638 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001639 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001640 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001641 Py_DECREF(v);
1642 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001643 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001644 }
Guido van Rossum921842f1990-11-18 17:30:23 +00001645 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00001646#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00001647 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00001648#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001649 _Py_ForgetReference(v);
1650 *pv = (PyObject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001651 realloc((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001652 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001653 if (*pv == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001654 PyMem_DEL(v);
1655 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001656 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001657 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001658 _Py_NewReference(*pv);
1659 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00001660 sv->ob_size = newsize;
1661 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001662 return 0;
1663}
Guido van Rossume5372401993-03-16 12:15:04 +00001664
1665/* Helpers for formatstring */
1666
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001667static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00001668getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001669 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00001670 int arglen;
1671 int *p_argidx;
1672{
1673 int argidx = *p_argidx;
1674 if (argidx < arglen) {
1675 (*p_argidx)++;
1676 if (arglen < 0)
1677 return args;
1678 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001679 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00001680 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001681 PyErr_SetString(PyExc_TypeError,
1682 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00001683 return NULL;
1684}
1685
1686#define F_LJUST (1<<0)
1687#define F_SIGN (1<<1)
1688#define F_BLANK (1<<2)
1689#define F_ALT (1<<3)
1690#define F_ZERO (1<<4)
1691
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001692static int
1693formatfloat(buf, flags, prec, type, v)
1694 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00001695 int flags;
1696 int prec;
1697 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001698 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00001699{
1700 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00001701 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001702 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001703 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00001704 if (prec < 0)
1705 prec = 6;
1706 if (prec > 50)
1707 prec = 50; /* Arbitrary limitation */
1708 if (type == 'f' && fabs(x)/1e25 >= 1e25)
1709 type = 'g';
1710 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
1711 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001712 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00001713}
1714
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001715static int
1716formatint(buf, flags, prec, type, v)
1717 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00001718 int flags;
1719 int prec;
1720 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001721 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00001722{
1723 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00001724 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001725 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001726 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00001727 if (prec < 0)
1728 prec = 1;
1729 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
1730 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001731 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00001732}
1733
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001734static int
1735formatchar(buf, v)
1736 char *buf;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001737 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00001738{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001739 if (PyString_Check(v)) {
1740 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001741 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00001742 }
1743 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001744 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001745 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00001746 }
1747 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001748 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00001749}
1750
Guido van Rossum013142a1994-08-30 08:19:36 +00001751
Guido van Rossume5372401993-03-16 12:15:04 +00001752/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
1753
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001754PyObject *
1755PyString_Format(format, args)
1756 PyObject *format;
1757 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00001758{
1759 char *fmt, *res;
1760 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00001761 int args_owned = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001762 PyObject *result;
1763 PyObject *dict = NULL;
1764 if (format == NULL || !PyString_Check(format) || args == NULL) {
1765 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00001766 return NULL;
1767 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001768 fmt = PyString_AsString(format);
1769 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00001770 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001771 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00001772 if (result == NULL)
1773 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001774 res = PyString_AsString(result);
1775 if (PyTuple_Check(args)) {
1776 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00001777 argidx = 0;
1778 }
1779 else {
1780 arglen = -1;
1781 argidx = -2;
1782 }
Guido van Rossum013142a1994-08-30 08:19:36 +00001783 if (args->ob_type->tp_as_mapping)
1784 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00001785 while (--fmtcnt >= 0) {
1786 if (*fmt != '%') {
1787 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00001788 rescnt = fmtcnt + 100;
1789 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001790 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00001791 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001792 res = PyString_AsString(result)
1793 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00001794 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00001795 }
1796 *res++ = *fmt++;
1797 }
1798 else {
1799 /* Got a format specifier */
1800 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00001801 int width = -1;
1802 int prec = -1;
1803 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00001804 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00001805 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001806 PyObject *v = NULL;
1807 PyObject *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +00001808 char *buf;
1809 int sign;
1810 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001811 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossumda9c2711996-12-05 21:58:58 +00001812 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00001813 if (*fmt == '(') {
1814 char *keystart;
1815 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001816 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00001817 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00001818
1819 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001820 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00001821 "format requires a mapping");
1822 goto error;
1823 }
1824 ++fmt;
1825 --fmtcnt;
1826 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00001827 /* Skip over balanced parentheses */
1828 while (pcount > 0 && --fmtcnt >= 0) {
1829 if (*fmt == ')')
1830 --pcount;
1831 else if (*fmt == '(')
1832 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00001833 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00001834 }
1835 keylen = fmt - keystart - 1;
1836 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001837 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00001838 "incomplete format key");
1839 goto error;
1840 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001841 key = PyString_FromStringAndSize(keystart,
1842 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00001843 if (key == NULL)
1844 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00001845 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001846 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00001847 args_owned = 0;
1848 }
1849 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001850 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00001851 if (args == NULL) {
1852 goto error;
1853 }
Guido van Rossum993952b1996-05-21 22:44:20 +00001854 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00001855 arglen = -1;
1856 argidx = -2;
1857 }
Guido van Rossume5372401993-03-16 12:15:04 +00001858 while (--fmtcnt >= 0) {
1859 switch (c = *fmt++) {
1860 case '-': flags |= F_LJUST; continue;
1861 case '+': flags |= F_SIGN; continue;
1862 case ' ': flags |= F_BLANK; continue;
1863 case '#': flags |= F_ALT; continue;
1864 case '0': flags |= F_ZERO; continue;
1865 }
1866 break;
1867 }
1868 if (c == '*') {
1869 v = getnextarg(args, arglen, &argidx);
1870 if (v == NULL)
1871 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001872 if (!PyInt_Check(v)) {
1873 PyErr_SetString(PyExc_TypeError,
1874 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00001875 goto error;
1876 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001877 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00001878 if (width < 0) {
1879 flags |= F_LJUST;
1880 width = -width;
1881 }
Guido van Rossume5372401993-03-16 12:15:04 +00001882 if (--fmtcnt >= 0)
1883 c = *fmt++;
1884 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00001885 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00001886 width = c - '0';
1887 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00001888 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00001889 if (!isdigit(c))
1890 break;
1891 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001892 PyErr_SetString(
1893 PyExc_ValueError,
1894 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00001895 goto error;
1896 }
1897 width = width*10 + (c - '0');
1898 }
1899 }
1900 if (c == '.') {
1901 prec = 0;
1902 if (--fmtcnt >= 0)
1903 c = *fmt++;
1904 if (c == '*') {
1905 v = getnextarg(args, arglen, &argidx);
1906 if (v == NULL)
1907 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001908 if (!PyInt_Check(v)) {
1909 PyErr_SetString(
1910 PyExc_TypeError,
1911 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00001912 goto error;
1913 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001914 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00001915 if (prec < 0)
1916 prec = 0;
1917 if (--fmtcnt >= 0)
1918 c = *fmt++;
1919 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00001920 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00001921 prec = c - '0';
1922 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00001923 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00001924 if (!isdigit(c))
1925 break;
1926 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001927 PyErr_SetString(
1928 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00001929 "prec too big");
1930 goto error;
1931 }
1932 prec = prec*10 + (c - '0');
1933 }
1934 }
1935 } /* prec */
1936 if (fmtcnt >= 0) {
1937 if (c == 'h' || c == 'l' || c == 'L') {
1938 size = c;
1939 if (--fmtcnt >= 0)
1940 c = *fmt++;
1941 }
1942 }
1943 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001944 PyErr_SetString(PyExc_ValueError,
1945 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00001946 goto error;
1947 }
1948 if (c != '%') {
1949 v = getnextarg(args, arglen, &argidx);
1950 if (v == NULL)
1951 goto error;
1952 }
1953 sign = 0;
1954 fill = ' ';
1955 switch (c) {
1956 case '%':
1957 buf = "%";
1958 len = 1;
1959 break;
1960 case 's':
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001961 temp = PyObject_Str(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00001962 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00001963 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00001964 if (!PyString_Check(temp)) {
1965 PyErr_SetString(PyExc_TypeError,
1966 "%s argument has non-string str()");
1967 goto error;
1968 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001969 buf = PyString_AsString(temp);
1970 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00001971 if (prec >= 0 && len > prec)
1972 len = prec;
1973 break;
1974 case 'i':
1975 case 'd':
1976 case 'u':
1977 case 'o':
1978 case 'x':
1979 case 'X':
1980 if (c == 'i')
1981 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00001982 buf = tmpbuf;
1983 len = formatint(buf, flags, prec, c, v);
1984 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00001985 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00001986 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00001987 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00001988 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00001989 if ((flags&F_ALT) &&
1990 (c == 'x' || c == 'X') &&
1991 buf[0] == '0' && buf[1] == c) {
1992 *res++ = *buf++;
1993 *res++ = *buf++;
1994 rescnt -= 2;
1995 len -= 2;
1996 width -= 2;
1997 if (width < 0)
1998 width = 0;
1999 }
2000 }
Guido van Rossume5372401993-03-16 12:15:04 +00002001 break;
2002 case 'e':
2003 case 'E':
2004 case 'f':
2005 case 'g':
2006 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002007 buf = tmpbuf;
2008 len = formatfloat(buf, flags, prec, c, v);
2009 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002010 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002011 sign = 1;
2012 if (flags&F_ZERO)
2013 fill = '0';
2014 break;
2015 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002016 buf = tmpbuf;
2017 len = formatchar(buf, v);
2018 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002019 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002020 break;
2021 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002022 PyErr_Format(PyExc_ValueError,
2023 "unsupported format character '%c' (0x%x)",
2024 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002025 goto error;
2026 }
2027 if (sign) {
2028 if (*buf == '-' || *buf == '+') {
2029 sign = *buf++;
2030 len--;
2031 }
2032 else if (flags & F_SIGN)
2033 sign = '+';
2034 else if (flags & F_BLANK)
2035 sign = ' ';
2036 else
2037 sign = '\0';
2038 }
2039 if (width < len)
2040 width = len;
2041 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002042 reslen -= rescnt;
2043 rescnt = width + fmtcnt + 100;
2044 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002045 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002046 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002047 res = PyString_AsString(result)
2048 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002049 }
2050 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002051 if (fill != ' ')
2052 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002053 rescnt--;
2054 if (width > len)
2055 width--;
2056 }
2057 if (width > len && !(flags&F_LJUST)) {
2058 do {
2059 --rescnt;
2060 *res++ = fill;
2061 } while (--width > len);
2062 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002063 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002064 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002065 memcpy(res, buf, len);
2066 res += len;
2067 rescnt -= len;
2068 while (--width >= len) {
2069 --rescnt;
2070 *res++ = ' ';
2071 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002072 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002073 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002074 "not all arguments converted");
2075 goto error;
2076 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002077 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002078 } /* '%' */
2079 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002080 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002081 PyErr_SetString(PyExc_TypeError,
2082 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002083 goto error;
2084 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002085 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002086 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002087 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002088 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002089 return result;
2090 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002091 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002092 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002093 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002094 }
Guido van Rossume5372401993-03-16 12:15:04 +00002095 return NULL;
2096}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002097
2098
2099#ifdef INTERN_STRINGS
2100
2101static PyObject *interned;
2102
2103void
2104PyString_InternInPlace(p)
2105 PyObject **p;
2106{
2107 register PyStringObject *s = (PyStringObject *)(*p);
2108 PyObject *t;
2109 if (s == NULL || !PyString_Check(s))
2110 Py_FatalError("PyString_InternInPlace: strings only please!");
2111 if ((t = s->ob_sinterned) != NULL) {
2112 if (t == (PyObject *)s)
2113 return;
2114 Py_INCREF(t);
2115 *p = t;
2116 Py_DECREF(s);
2117 return;
2118 }
2119 if (interned == NULL) {
2120 interned = PyDict_New();
2121 if (interned == NULL)
2122 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002123 }
2124 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2125 Py_INCREF(t);
2126 *p = s->ob_sinterned = t;
2127 Py_DECREF(s);
2128 return;
2129 }
2130 t = (PyObject *)s;
2131 if (PyDict_SetItem(interned, t, t) == 0) {
2132 s->ob_sinterned = t;
2133 return;
2134 }
2135 PyErr_Clear();
2136}
2137
2138
2139PyObject *
2140PyString_InternFromString(cp)
2141 const char *cp;
2142{
2143 PyObject *s = PyString_FromString(cp);
2144 if (s == NULL)
2145 return NULL;
2146 PyString_InternInPlace(&s);
2147 return s;
2148}
2149
2150#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002151
2152void
2153PyString_Fini()
2154{
2155 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002156 for (i = 0; i < UCHAR_MAX + 1; i++) {
2157 Py_XDECREF(characters[i]);
2158 characters[i] = NULL;
2159 }
2160#ifndef DONT_SHARE_SHORT_STRINGS
2161 Py_XDECREF(nullstring);
2162 nullstring = NULL;
2163#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002164#ifdef INTERN_STRINGS
2165 if (interned) {
2166 int pos, changed;
2167 PyObject *key, *value;
2168 do {
2169 changed = 0;
2170 pos = 0;
2171 while (PyDict_Next(interned, &pos, &key, &value)) {
2172 if (key->ob_refcnt == 2 && key == value) {
2173 PyDict_DelItem(interned, key);
2174 changed = 1;
2175 }
2176 }
2177 } while (changed);
2178 }
2179#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002180}