blob: 5b5ed9c40efa4ee4edb93573bc57a3e0c137ef28 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum71160aa1997-06-03 18:03:18 +000036#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000037#include <ctype.h>
38
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000039#ifdef COUNT_ALLOCS
40int null_strings, one_strings;
41#endif
42
Guido van Rossum03093a21994-09-28 15:51:32 +000043#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000044#include <limits.h>
45#else
46#ifndef UCHAR_MAX
47#define UCHAR_MAX 255
48#endif
49#endif
50
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000052#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055
56/*
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
64 not NULL.
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
70*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071PyObject *
72PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000073 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074 int size;
75{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000077#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
92 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 op = (PyStringObject *)
96 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000097 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return PyErr_NoMemory();
99 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000100 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101#ifdef CACHE_HASH
102 op->ob_shash = -1;
103#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000104#ifdef INTERN_STRINGS
105 op->ob_sinterned = NULL;
106#endif
Guido van Rossumbffd6832000-01-20 22:32:56 +0000107 _Py_NewReference((PyObject *)op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000108 if (str != NULL)
109 memcpy(op->ob_sval, str, size);
110 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0) {
113 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 } else if (size == 1 && str != NULL) {
116 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121}
122
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123PyObject *
124PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000125 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126{
127 register unsigned int size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000129#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 if (size == 0 && (op = nullstring) != NULL) {
131#ifdef COUNT_ALLOCS
132 null_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138#ifdef COUNT_ALLOCS
139 one_strings++;
140#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000144#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000145 op = (PyStringObject *)
146 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000147 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return PyErr_NoMemory();
149 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000150 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151#ifdef CACHE_HASH
152 op->ob_shash = -1;
153#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000154#ifdef INTERN_STRINGS
155 op->ob_sinterned = NULL;
156#endif
Guido van Rossumbffd6832000-01-20 22:32:56 +0000157 _Py_NewReference((PyObject *)op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000158 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000159#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 if (size == 0) {
161 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000162 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000163 } else if (size == 1) {
164 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000165 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000166 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000167#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000168 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169}
170
Guido van Rossum234f9421993-06-17 12:35:49 +0000171static void
Guido van Rossume5372401993-03-16 12:15:04 +0000172string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000173 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000174{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000175 PyMem_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000176}
177
Guido van Rossumd7047b31995-01-02 19:07:15 +0000178int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000179PyString_Size(op)
180 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000182 if (!PyString_Check(op)) {
183 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184 return -1;
185 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000186 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000187}
188
189/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000190PyString_AsString(op)
191 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000193 if (!PyString_Check(op)) {
194 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 return NULL;
196 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000197 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198}
199
200/* Methods */
201
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000202static int
Guido van Rossume5372401993-03-16 12:15:04 +0000203string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000204 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 FILE *fp;
206 int flags;
207{
208 int i;
209 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000210 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000212 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000214 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216
217 /* figure out which quote to use; single is prefered */
218 quote = '\'';
219 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
220 quote = '"';
221
222 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 for (i = 0; i < op->ob_size; i++) {
224 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 fprintf(fp, "\\%c", c);
227 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000228 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000232 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000233 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234}
235
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000236static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000237string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239{
240 /* XXX overflow? */
241 int newsize = 2 + 4 * op->ob_size * sizeof(char);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000242 PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000244 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000245 }
246 else {
247 register int i;
248 register char c;
249 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000250 int quote;
251
252 /* figure out which quote to use; single is prefered */
253 quote = '\'';
254 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
255 quote = '"';
256
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000257 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 for (i = 0; i < op->ob_size; i++) {
260 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 *p++ = '\\', *p++ = c;
263 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000264 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 while (*p != '\0')
266 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 }
268 else
269 *p++ = c;
270 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000271 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000273 _PyString_Resize(
274 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000275 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277}
278
279static int
Guido van Rossume5372401993-03-16 12:15:04 +0000280string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282{
283 return a->ob_size;
284}
285
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000286static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000287string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000288 register PyStringObject *a;
289 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290{
291 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 register PyStringObject *op;
293 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000294 if (PyUnicode_Check(bb))
295 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000296 PyErr_BadArgument();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297 return NULL;
298 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000299#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 /* Optimize cases with empty left or right operand */
301 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000302 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303 return bb;
304 }
305 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000306 Py_INCREF(a);
307 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000308 }
309 size = a->ob_size + b->ob_size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000310 op = (PyStringObject *)
311 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000312 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000313 return PyErr_NoMemory();
314 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000315 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000316#ifdef CACHE_HASH
317 op->ob_shash = -1;
318#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000319#ifdef INTERN_STRINGS
320 op->ob_sinterned = NULL;
321#endif
Guido van Rossumbffd6832000-01-20 22:32:56 +0000322 _Py_NewReference((PyObject *)op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000323 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
324 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
325 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000326 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000327#undef b
328}
329
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000330static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000331string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000332 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333 register int n;
334{
335 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000336 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000337 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 if (n < 0)
339 n = 0;
340 size = a->ob_size * n;
341 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000342 Py_INCREF(a);
343 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 op = (PyStringObject *)
346 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000347 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000348 return PyErr_NoMemory();
349 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000350 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000351#ifdef CACHE_HASH
352 op->ob_shash = -1;
353#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000354#ifdef INTERN_STRINGS
355 op->ob_sinterned = NULL;
356#endif
Guido van Rossumbffd6832000-01-20 22:32:56 +0000357 _Py_NewReference((PyObject *)op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000358 for (i = 0; i < size; i += a->ob_size)
359 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
360 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000361 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362}
363
364/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
365
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000367string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000368 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000369 register int i, j; /* May be negative! */
370{
371 if (i < 0)
372 i = 0;
373 if (j < 0)
374 j = 0; /* Avoid signed/unsigned bug in next line */
375 if (j > a->ob_size)
376 j = a->ob_size;
377 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000378 Py_INCREF(a);
379 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380 }
381 if (j < i)
382 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000383 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000384}
385
Guido van Rossum9284a572000-03-07 15:53:43 +0000386static int
387string_contains(a, el)
388PyObject *a, *el;
389{
390 register char *s, *end;
391 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000392 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000393 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000394 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000395 PyErr_SetString(PyExc_TypeError,
396 "string member test needs char left operand");
397 return -1;
398 }
399 c = PyString_AsString(el)[0];
400 s = PyString_AsString(a);
401 end = s + PyString_Size(a);
402 while (s < end) {
403 if (c == *s++)
404 return 1;
405 }
406 return 0;
407}
408
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000410string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000411 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 register int i;
413{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000414 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000418 return NULL;
419 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000420 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000422#ifdef COUNT_ALLOCS
423 if (v != NULL)
424 one_strings++;
425#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000426 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000428 if (v == NULL)
429 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430 characters[c] = (PyStringObject *) v;
431 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000432 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000434 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435}
436
437static int
Guido van Rossume5372401993-03-16 12:15:04 +0000438string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440{
Guido van Rossum253919f1991-02-13 23:18:39 +0000441 int len_a = a->ob_size, len_b = b->ob_size;
442 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000443 int cmp;
444 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000445 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000446 if (cmp == 0)
447 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
448 if (cmp != 0)
449 return cmp;
450 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000451 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000452}
453
Guido van Rossum9bfef441993-03-29 10:43:31 +0000454static long
455string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000456 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000457{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000458 register int len;
459 register unsigned char *p;
460 register long x;
461
462#ifdef CACHE_HASH
463 if (a->ob_shash != -1)
464 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000465#ifdef INTERN_STRINGS
466 if (a->ob_sinterned != NULL)
467 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000468 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000469#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000470#endif
471 len = a->ob_size;
472 p = (unsigned char *) a->ob_sval;
473 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000474 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000475 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000476 x ^= a->ob_size;
477 if (x == -1)
478 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000479#ifdef CACHE_HASH
480 a->ob_shash = x;
481#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000482 return x;
483}
484
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000485static int
486string_buffer_getreadbuf(self, index, ptr)
487 PyStringObject *self;
488 int index;
489 const void **ptr;
490{
491 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000492 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000493 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000494 return -1;
495 }
496 *ptr = (void *)self->ob_sval;
497 return self->ob_size;
498}
499
500static int
501string_buffer_getwritebuf(self, index, ptr)
502 PyStringObject *self;
503 int index;
504 const void **ptr;
505{
Guido van Rossum045e6881997-09-08 18:30:11 +0000506 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000507 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000508 return -1;
509}
510
511static int
512string_buffer_getsegcount(self, lenp)
513 PyStringObject *self;
514 int *lenp;
515{
516 if ( lenp )
517 *lenp = self->ob_size;
518 return 1;
519}
520
Guido van Rossum1db70701998-10-08 02:18:52 +0000521static int
522string_buffer_getcharbuf(self, index, ptr)
523 PyStringObject *self;
524 int index;
525 const char **ptr;
526{
527 if ( index != 0 ) {
528 PyErr_SetString(PyExc_SystemError,
529 "accessing non-existent string segment");
530 return -1;
531 }
532 *ptr = self->ob_sval;
533 return self->ob_size;
534}
535
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000536static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000537 (inquiry)string_length, /*sq_length*/
538 (binaryfunc)string_concat, /*sq_concat*/
539 (intargfunc)string_repeat, /*sq_repeat*/
540 (intargfunc)string_item, /*sq_item*/
541 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000542 0, /*sq_ass_item*/
543 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000544 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000545};
546
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000547static PyBufferProcs string_as_buffer = {
548 (getreadbufferproc)string_buffer_getreadbuf,
549 (getwritebufferproc)string_buffer_getwritebuf,
550 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000551 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000552};
553
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000554
555
556#define LEFTSTRIP 0
557#define RIGHTSTRIP 1
558#define BOTHSTRIP 2
559
560
561static PyObject *
562split_whitespace(s, len, maxsplit)
563 char *s;
564 int len;
565 int maxsplit;
566{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000567 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000568 PyObject* item;
569 PyObject *list = PyList_New(0);
570
571 if (list == NULL)
572 return NULL;
573
Guido van Rossum4c08d552000-03-10 22:55:18 +0000574 for (i = j = 0; i < len; ) {
575 while (i < len && isspace(Py_CHARMASK(s[i])))
576 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000577 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000578 while (i < len && !isspace(Py_CHARMASK(s[i])))
579 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000580 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000581 if (maxsplit-- <= 0)
582 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000583 item = PyString_FromStringAndSize(s+j, (int)(i-j));
584 if (item == NULL)
585 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000586 err = PyList_Append(list, item);
587 Py_DECREF(item);
588 if (err < 0)
589 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000590 while (i < len && isspace(Py_CHARMASK(s[i])))
591 i++;
592 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000593 }
594 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000595 if (j < len) {
596 item = PyString_FromStringAndSize(s+j, (int)(len - j));
597 if (item == NULL)
598 goto finally;
599 err = PyList_Append(list, item);
600 Py_DECREF(item);
601 if (err < 0)
602 goto finally;
603 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000604 return list;
605 finally:
606 Py_DECREF(list);
607 return NULL;
608}
609
610
611static char split__doc__[] =
612"S.split([sep [,maxsplit]]) -> list of strings\n\
613\n\
614Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000615delimiter string. If maxsplit is given, at most maxsplit\n\
616splits are done. If sep is not specified, any whitespace string\n\
617is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000618
619static PyObject *
620string_split(self, args)
621 PyStringObject *self;
622 PyObject *args;
623{
624 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 int maxsplit = -1;
626 const char *s = PyString_AS_STRING(self), *sub;
627 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000628
Guido van Rossum4c08d552000-03-10 22:55:18 +0000629 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000631 if (maxsplit < 0)
632 maxsplit = INT_MAX;
633 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000634 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000635 if (PyString_Check(subobj)) {
636 sub = PyString_AS_STRING(subobj);
637 n = PyString_GET_SIZE(subobj);
638 }
639 else if (PyUnicode_Check(subobj))
640 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
641 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
642 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000643 if (n == 0) {
644 PyErr_SetString(PyExc_ValueError, "empty separator");
645 return NULL;
646 }
647
648 list = PyList_New(0);
649 if (list == NULL)
650 return NULL;
651
652 i = j = 0;
653 while (i+n <= len) {
654 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000655 if (maxsplit-- <= 0)
656 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000657 item = PyString_FromStringAndSize(s+j, (int)(i-j));
658 if (item == NULL)
659 goto fail;
660 err = PyList_Append(list, item);
661 Py_DECREF(item);
662 if (err < 0)
663 goto fail;
664 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665 }
666 else
667 i++;
668 }
669 item = PyString_FromStringAndSize(s+j, (int)(len-j));
670 if (item == NULL)
671 goto fail;
672 err = PyList_Append(list, item);
673 Py_DECREF(item);
674 if (err < 0)
675 goto fail;
676
677 return list;
678
679 fail:
680 Py_DECREF(list);
681 return NULL;
682}
683
684
685static char join__doc__[] =
686"S.join(sequence) -> string\n\
687\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000688Return a string which is the concatenation of the strings in the\n\
689sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690
691static PyObject *
692string_join(self, args)
693 PyStringObject *self;
694 PyObject *args;
695{
696 char *sep = PyString_AS_STRING(self);
697 int seplen = PyString_GET_SIZE(self);
698 PyObject *res = NULL;
699 int reslen = 0;
700 char *p;
701 int seqlen = 0;
702 int sz = 100;
703 int i, slen;
704 PyObject *seq;
705
Guido van Rossum43713e52000-02-29 13:59:29 +0000706 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000707 return NULL;
708
709 seqlen = PySequence_Length(seq);
710 if (seqlen < 0 && PyErr_Occurred())
711 return NULL;
712
713 if (seqlen == 1) {
714 /* Optimization if there's only one item */
715 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000716 if (item == NULL)
717 return NULL;
718 if (!PyString_Check(item) &&
719 !PyUnicode_Check(item)) {
720 PyErr_SetString(PyExc_TypeError,
721 "first argument must be sequence of strings");
722 Py_DECREF(item);
723 return NULL;
724 }
725 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000726 }
727 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
728 return NULL;
729 p = PyString_AsString(res);
730
731 /* optimize for lists. all others (tuples and arbitrary sequences)
732 * just use the abstract interface.
733 */
734 if (PyList_Check(seq)) {
735 for (i = 0; i < seqlen; i++) {
736 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (!PyString_Check(item)){
738 if (PyUnicode_Check(item)) {
739 Py_DECREF(res);
740 return PyUnicode_Join(
741 (PyObject *)self,
742 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000743 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000744 PyErr_Format(PyExc_TypeError,
745 "sequence item %i not a string",
746 i);
747 goto finally;
748 }
749 slen = PyString_GET_SIZE(item);
750 while (reslen + slen + seplen >= sz) {
751 if (_PyString_Resize(&res, sz*2))
752 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000753 sz *= 2;
754 p = PyString_AsString(res) + reslen;
755 }
756 if (i > 0) {
757 memcpy(p, sep, seplen);
758 p += seplen;
759 reslen += seplen;
760 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000761 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 p += slen;
763 reslen += slen;
764 }
765 }
766 else {
767 for (i = 0; i < seqlen; i++) {
768 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000769 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000770 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000771 if (!PyString_Check(item)){
772 if (PyUnicode_Check(item)) {
773 Py_DECREF(res);
774 Py_DECREF(item);
775 return PyUnicode_Join(
776 (PyObject *)self,
777 seq);
778 }
779 Py_DECREF(item);
780 PyErr_Format(PyExc_TypeError,
781 "sequence item %i not a string",
782 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000783 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000784 }
785 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000786 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000787 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000788 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000790 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000791 sz *= 2;
792 p = PyString_AsString(res) + reslen;
793 }
794 if (i > 0) {
795 memcpy(p, sep, seplen);
796 p += seplen;
797 reslen += seplen;
798 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000799 memcpy(p, PyString_AS_STRING(item), slen);
800 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000801 p += slen;
802 reslen += slen;
803 }
804 }
805 if (_PyString_Resize(&res, reslen))
806 goto finally;
807 return res;
808
809 finally:
810 Py_DECREF(res);
811 return NULL;
812}
813
814
815
816static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000818 PyStringObject *self;
819 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000820 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000822 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000823 int len = PyString_GET_SIZE(self);
824 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000825 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000826
Guido van Rossum4c08d552000-03-10 22:55:18 +0000827 if (!PyArg_ParseTuple(args, "O|ii:find/rfind/index/rindex",
828 &subobj, &i, &last))
829 return -2;
830 if (PyString_Check(subobj)) {
831 sub = PyString_AS_STRING(subobj);
832 n = PyString_GET_SIZE(subobj);
833 }
834 else if (PyUnicode_Check(subobj))
835 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
836 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000837 return -2;
838
839 if (last > len)
840 last = len;
841 if (last < 0)
842 last += len;
843 if (last < 0)
844 last = 0;
845 if (i < 0)
846 i += len;
847 if (i < 0)
848 i = 0;
849
Guido van Rossum4c08d552000-03-10 22:55:18 +0000850 if (dir > 0) {
851 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000852 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000853 last -= n;
854 for (; i <= last; ++i)
855 if (s[i] == sub[0] &&
856 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
857 return (long)i;
858 }
859 else {
860 int j;
861
862 if (n == 0 && i <= last)
863 return (long)last;
864 for (j = last-n; j >= i; --j)
865 if (s[j] == sub[0] &&
866 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
867 return (long)j;
868 }
869
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000870 return -1;
871}
872
873
874static char find__doc__[] =
875"S.find(sub [,start [,end]]) -> int\n\
876\n\
877Return the lowest index in S where substring sub is found,\n\
878such that sub is contained within s[start,end]. Optional\n\
879arguments start and end are interpreted as in slice notation.\n\
880\n\
881Return -1 on failure.";
882
883static PyObject *
884string_find(self, args)
885 PyStringObject *self;
886 PyObject *args;
887{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000888 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889 if (result == -2)
890 return NULL;
891 return PyInt_FromLong(result);
892}
893
894
895static char index__doc__[] =
896"S.index(sub [,start [,end]]) -> int\n\
897\n\
898Like S.find() but raise ValueError when the substring is not found.";
899
900static PyObject *
901string_index(self, args)
902 PyStringObject *self;
903 PyObject *args;
904{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000905 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000906 if (result == -2)
907 return NULL;
908 if (result == -1) {
909 PyErr_SetString(PyExc_ValueError,
910 "substring not found in string.index");
911 return NULL;
912 }
913 return PyInt_FromLong(result);
914}
915
916
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000917static char rfind__doc__[] =
918"S.rfind(sub [,start [,end]]) -> int\n\
919\n\
920Return the highest index in S where substring sub is found,\n\
921such that sub is contained within s[start,end]. Optional\n\
922arguments start and end are interpreted as in slice notation.\n\
923\n\
924Return -1 on failure.";
925
926static PyObject *
927string_rfind(self, args)
928 PyStringObject *self;
929 PyObject *args;
930{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000931 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000932 if (result == -2)
933 return NULL;
934 return PyInt_FromLong(result);
935}
936
937
938static char rindex__doc__[] =
939"S.rindex(sub [,start [,end]]) -> int\n\
940\n\
941Like S.rfind() but raise ValueError when the substring is not found.";
942
943static PyObject *
944string_rindex(self, args)
945 PyStringObject *self;
946 PyObject *args;
947{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000949 if (result == -2)
950 return NULL;
951 if (result == -1) {
952 PyErr_SetString(PyExc_ValueError,
953 "substring not found in string.rindex");
954 return NULL;
955 }
956 return PyInt_FromLong(result);
957}
958
959
960static PyObject *
961do_strip(self, args, striptype)
962 PyStringObject *self;
963 PyObject *args;
964 int striptype;
965{
966 char *s = PyString_AS_STRING(self);
967 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000968
Guido van Rossum43713e52000-02-29 13:59:29 +0000969 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000970 return NULL;
971
972 i = 0;
973 if (striptype != RIGHTSTRIP) {
974 while (i < len && isspace(Py_CHARMASK(s[i]))) {
975 i++;
976 }
977 }
978
979 j = len;
980 if (striptype != LEFTSTRIP) {
981 do {
982 j--;
983 } while (j >= i && isspace(Py_CHARMASK(s[j])));
984 j++;
985 }
986
987 if (i == 0 && j == len) {
988 Py_INCREF(self);
989 return (PyObject*)self;
990 }
991 else
992 return PyString_FromStringAndSize(s+i, j-i);
993}
994
995
996static char strip__doc__[] =
997"S.strip() -> string\n\
998\n\
999Return a copy of the string S with leading and trailing\n\
1000whitespace removed.";
1001
1002static PyObject *
1003string_strip(self, args)
1004 PyStringObject *self;
1005 PyObject *args;
1006{
1007 return do_strip(self, args, BOTHSTRIP);
1008}
1009
1010
1011static char lstrip__doc__[] =
1012"S.lstrip() -> string\n\
1013\n\
1014Return a copy of the string S with leading whitespace removed.";
1015
1016static PyObject *
1017string_lstrip(self, args)
1018 PyStringObject *self;
1019 PyObject *args;
1020{
1021 return do_strip(self, args, LEFTSTRIP);
1022}
1023
1024
1025static char rstrip__doc__[] =
1026"S.rstrip() -> string\n\
1027\n\
1028Return a copy of the string S with trailing whitespace removed.";
1029
1030static PyObject *
1031string_rstrip(self, args)
1032 PyStringObject *self;
1033 PyObject *args;
1034{
1035 return do_strip(self, args, RIGHTSTRIP);
1036}
1037
1038
1039static char lower__doc__[] =
1040"S.lower() -> string\n\
1041\n\
1042Return a copy of the string S converted to lowercase.";
1043
1044static PyObject *
1045string_lower(self, args)
1046 PyStringObject *self;
1047 PyObject *args;
1048{
1049 char *s = PyString_AS_STRING(self), *s_new;
1050 int i, n = PyString_GET_SIZE(self);
1051 PyObject *new;
1052
Guido van Rossum43713e52000-02-29 13:59:29 +00001053 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054 return NULL;
1055 new = PyString_FromStringAndSize(NULL, n);
1056 if (new == NULL)
1057 return NULL;
1058 s_new = PyString_AsString(new);
1059 for (i = 0; i < n; i++) {
1060 int c = Py_CHARMASK(*s++);
1061 if (isupper(c)) {
1062 *s_new = tolower(c);
1063 } else
1064 *s_new = c;
1065 s_new++;
1066 }
1067 return new;
1068}
1069
1070
1071static char upper__doc__[] =
1072"S.upper() -> string\n\
1073\n\
1074Return a copy of the string S converted to uppercase.";
1075
1076static PyObject *
1077string_upper(self, args)
1078 PyStringObject *self;
1079 PyObject *args;
1080{
1081 char *s = PyString_AS_STRING(self), *s_new;
1082 int i, n = PyString_GET_SIZE(self);
1083 PyObject *new;
1084
Guido van Rossum43713e52000-02-29 13:59:29 +00001085 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086 return NULL;
1087 new = PyString_FromStringAndSize(NULL, n);
1088 if (new == NULL)
1089 return NULL;
1090 s_new = PyString_AsString(new);
1091 for (i = 0; i < n; i++) {
1092 int c = Py_CHARMASK(*s++);
1093 if (islower(c)) {
1094 *s_new = toupper(c);
1095 } else
1096 *s_new = c;
1097 s_new++;
1098 }
1099 return new;
1100}
1101
1102
Guido van Rossum4c08d552000-03-10 22:55:18 +00001103static char title__doc__[] =
1104"S.title() -> string\n\
1105\n\
1106Return a titlecased version of S, i.e. words start with uppercase\n\
1107characters, all remaining cased characters have lowercase.";
1108
1109static PyObject*
1110string_title(PyUnicodeObject *self, PyObject *args)
1111{
1112 char *s = PyString_AS_STRING(self), *s_new;
1113 int i, n = PyString_GET_SIZE(self);
1114 int previous_is_cased = 0;
1115 PyObject *new;
1116
1117 if (!PyArg_ParseTuple(args, ":title"))
1118 return NULL;
1119 new = PyString_FromStringAndSize(NULL, n);
1120 if (new == NULL)
1121 return NULL;
1122 s_new = PyString_AsString(new);
1123 for (i = 0; i < n; i++) {
1124 int c = Py_CHARMASK(*s++);
1125 if (islower(c)) {
1126 if (!previous_is_cased)
1127 c = toupper(c);
1128 previous_is_cased = 1;
1129 } else if (isupper(c)) {
1130 if (previous_is_cased)
1131 c = tolower(c);
1132 previous_is_cased = 1;
1133 } else
1134 previous_is_cased = 0;
1135 *s_new++ = c;
1136 }
1137 return new;
1138}
1139
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140static char capitalize__doc__[] =
1141"S.capitalize() -> string\n\
1142\n\
1143Return a copy of the string S with only its first character\n\
1144capitalized.";
1145
1146static PyObject *
1147string_capitalize(self, args)
1148 PyStringObject *self;
1149 PyObject *args;
1150{
1151 char *s = PyString_AS_STRING(self), *s_new;
1152 int i, n = PyString_GET_SIZE(self);
1153 PyObject *new;
1154
Guido van Rossum43713e52000-02-29 13:59:29 +00001155 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001156 return NULL;
1157 new = PyString_FromStringAndSize(NULL, n);
1158 if (new == NULL)
1159 return NULL;
1160 s_new = PyString_AsString(new);
1161 if (0 < n) {
1162 int c = Py_CHARMASK(*s++);
1163 if (islower(c))
1164 *s_new = toupper(c);
1165 else
1166 *s_new = c;
1167 s_new++;
1168 }
1169 for (i = 1; i < n; i++) {
1170 int c = Py_CHARMASK(*s++);
1171 if (isupper(c))
1172 *s_new = tolower(c);
1173 else
1174 *s_new = c;
1175 s_new++;
1176 }
1177 return new;
1178}
1179
1180
1181static char count__doc__[] =
1182"S.count(sub[, start[, end]]) -> int\n\
1183\n\
1184Return the number of occurrences of substring sub in string\n\
1185S[start:end]. Optional arguments start and end are\n\
1186interpreted as in slice notation.";
1187
1188static PyObject *
1189string_count(self, args)
1190 PyStringObject *self;
1191 PyObject *args;
1192{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001193 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001194 int len = PyString_GET_SIZE(self), n;
1195 int i = 0, last = INT_MAX;
1196 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001197 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001198
Guido van Rossum4c08d552000-03-10 22:55:18 +00001199 if (!PyArg_ParseTuple(args, "O|ii:count", &subobj, &i, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001200 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001201 if (PyString_Check(subobj)) {
1202 sub = PyString_AS_STRING(subobj);
1203 n = PyString_GET_SIZE(subobj);
1204 }
1205 else if (PyUnicode_Check(subobj))
1206 return PyInt_FromLong(
1207 PyUnicode_Count((PyObject *)self, subobj, i, last));
1208 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1209 return NULL;
1210
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001211 if (last > len)
1212 last = len;
1213 if (last < 0)
1214 last += len;
1215 if (last < 0)
1216 last = 0;
1217 if (i < 0)
1218 i += len;
1219 if (i < 0)
1220 i = 0;
1221 m = last + 1 - n;
1222 if (n == 0)
1223 return PyInt_FromLong((long) (m-i));
1224
1225 r = 0;
1226 while (i < m) {
1227 if (!memcmp(s+i, sub, n)) {
1228 r++;
1229 i += n;
1230 } else {
1231 i++;
1232 }
1233 }
1234 return PyInt_FromLong((long) r);
1235}
1236
1237
1238static char swapcase__doc__[] =
1239"S.swapcase() -> string\n\
1240\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001241Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242converted to lowercase and vice versa.";
1243
1244static PyObject *
1245string_swapcase(self, args)
1246 PyStringObject *self;
1247 PyObject *args;
1248{
1249 char *s = PyString_AS_STRING(self), *s_new;
1250 int i, n = PyString_GET_SIZE(self);
1251 PyObject *new;
1252
Guido van Rossum43713e52000-02-29 13:59:29 +00001253 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001254 return NULL;
1255 new = PyString_FromStringAndSize(NULL, n);
1256 if (new == NULL)
1257 return NULL;
1258 s_new = PyString_AsString(new);
1259 for (i = 0; i < n; i++) {
1260 int c = Py_CHARMASK(*s++);
1261 if (islower(c)) {
1262 *s_new = toupper(c);
1263 }
1264 else if (isupper(c)) {
1265 *s_new = tolower(c);
1266 }
1267 else
1268 *s_new = c;
1269 s_new++;
1270 }
1271 return new;
1272}
1273
1274
1275static char translate__doc__[] =
1276"S.translate(table [,deletechars]) -> string\n\
1277\n\
1278Return a copy of the string S, where all characters occurring\n\
1279in the optional argument deletechars are removed, and the\n\
1280remaining characters have been mapped through the given\n\
1281translation table, which must be a string of length 256.";
1282
1283static PyObject *
1284string_translate(self, args)
1285 PyStringObject *self;
1286 PyObject *args;
1287{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001288 register char *input, *output;
1289 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 register int i, c, changed = 0;
1291 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293 int inlen, tablen, dellen = 0;
1294 PyObject *result;
1295 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001297
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 if (!PyArg_ParseTuple(args, "O|O:translate",
1299 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301
1302 if (PyString_Check(tableobj)) {
1303 table1 = PyString_AS_STRING(tableobj);
1304 tablen = PyString_GET_SIZE(tableobj);
1305 }
1306 else if (PyUnicode_Check(tableobj)) {
1307 /* Unicode .translate() does not support the deletechars
1308 parameter; instead a mapping to None will cause characters
1309 to be deleted. */
1310 if (delobj != NULL) {
1311 PyErr_SetString(PyExc_TypeError,
1312 "deletions are implemented differently for unicode");
1313 return NULL;
1314 }
1315 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1316 }
1317 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319
1320 if (delobj != NULL) {
1321 if (PyString_Check(delobj)) {
1322 del_table = PyString_AS_STRING(delobj);
1323 dellen = PyString_GET_SIZE(delobj);
1324 }
1325 else if (PyUnicode_Check(delobj)) {
1326 PyErr_SetString(PyExc_TypeError,
1327 "deletions are implemented differently for unicode");
1328 return NULL;
1329 }
1330 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1331 return NULL;
1332
1333 if (tablen != 256) {
1334 PyErr_SetString(PyExc_ValueError,
1335 "translation table must be 256 characters long");
1336 return NULL;
1337 }
1338 }
1339 else {
1340 del_table = NULL;
1341 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342 }
1343
1344 table = table1;
1345 inlen = PyString_Size(input_obj);
1346 result = PyString_FromStringAndSize((char *)NULL, inlen);
1347 if (result == NULL)
1348 return NULL;
1349 output_start = output = PyString_AsString(result);
1350 input = PyString_AsString(input_obj);
1351
1352 if (dellen == 0) {
1353 /* If no deletions are required, use faster code */
1354 for (i = inlen; --i >= 0; ) {
1355 c = Py_CHARMASK(*input++);
1356 if (Py_CHARMASK((*output++ = table[c])) != c)
1357 changed = 1;
1358 }
1359 if (changed)
1360 return result;
1361 Py_DECREF(result);
1362 Py_INCREF(input_obj);
1363 return input_obj;
1364 }
1365
1366 for (i = 0; i < 256; i++)
1367 trans_table[i] = Py_CHARMASK(table[i]);
1368
1369 for (i = 0; i < dellen; i++)
1370 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1371
1372 for (i = inlen; --i >= 0; ) {
1373 c = Py_CHARMASK(*input++);
1374 if (trans_table[c] != -1)
1375 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1376 continue;
1377 changed = 1;
1378 }
1379 if (!changed) {
1380 Py_DECREF(result);
1381 Py_INCREF(input_obj);
1382 return input_obj;
1383 }
1384 /* Fix the size of the resulting string */
1385 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1386 return NULL;
1387 return result;
1388}
1389
1390
1391/* What follows is used for implementing replace(). Perry Stoll. */
1392
1393/*
1394 mymemfind
1395
1396 strstr replacement for arbitrary blocks of memory.
1397
Barry Warsaw51ac5802000-03-20 16:36:48 +00001398 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 contents of memory pointed to by PAT. Returns the index into MEM if
1400 found, or -1 if not found. If len of PAT is greater than length of
1401 MEM, the function returns -1.
1402*/
1403static int
1404mymemfind(mem, len, pat, pat_len)
1405 char *mem;
1406 int len;
1407 char *pat;
1408 int pat_len;
1409{
1410 register int ii;
1411
1412 /* pattern can not occur in the last pat_len-1 chars */
1413 len -= pat_len;
1414
1415 for (ii = 0; ii <= len; ii++) {
1416 if (mem[ii] == pat[0] &&
1417 (pat_len == 1 ||
1418 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1419 return ii;
1420 }
1421 }
1422 return -1;
1423}
1424
1425/*
1426 mymemcnt
1427
1428 Return the number of distinct times PAT is found in MEM.
1429 meaning mem=1111 and pat==11 returns 2.
1430 mem=11111 and pat==11 also return 2.
1431 */
1432static int
1433mymemcnt(mem, len, pat, pat_len)
1434 char *mem;
1435 int len;
1436 char *pat;
1437 int pat_len;
1438{
1439 register int offset = 0;
1440 int nfound = 0;
1441
1442 while (len >= 0) {
1443 offset = mymemfind(mem, len, pat, pat_len);
1444 if (offset == -1)
1445 break;
1446 mem += offset + pat_len;
1447 len -= offset + pat_len;
1448 nfound++;
1449 }
1450 return nfound;
1451}
1452
1453/*
1454 mymemreplace
1455
1456 Return a string in which all occurences of PAT in memory STR are
1457 replaced with SUB.
1458
1459 If length of PAT is less than length of STR or there are no occurences
1460 of PAT in STR, then the original string is returned. Otherwise, a new
1461 string is allocated here and returned.
1462
1463 on return, out_len is:
1464 the length of output string, or
1465 -1 if the input string is returned, or
1466 unchanged if an error occurs (no memory).
1467
1468 return value is:
1469 the new string allocated locally, or
1470 NULL if an error occurred.
1471*/
1472static char *
1473mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1474 char *str;
1475 int len; /* input string */
1476 char *pat;
1477 int pat_len; /* pattern string to find */
1478 char *sub;
1479 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 int *out_len;
1482
1483{
1484 char *out_s;
1485 char *new_s;
1486 int nfound, offset, new_len;
1487
1488 if (len == 0 || pat_len > len)
1489 goto return_same;
1490
1491 /* find length of output string */
1492 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 if (count < 0)
1494 count = INT_MAX;
1495 else if (nfound > count)
1496 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 if (nfound == 0)
1498 goto return_same;
1499 new_len = len + nfound*(sub_len - pat_len);
1500
1501 new_s = (char *)malloc(new_len);
1502 if (new_s == NULL) return NULL;
1503
1504 *out_len = new_len;
1505 out_s = new_s;
1506
1507 while (len > 0) {
1508 /* find index of next instance of pattern */
1509 offset = mymemfind(str, len, pat, pat_len);
1510 /* if not found, break out of loop */
1511 if (offset == -1) break;
1512
1513 /* copy non matching part of input string */
1514 memcpy(new_s, str, offset); /* copy part of str before pat */
1515 str += offset + pat_len; /* move str past pattern */
1516 len -= offset + pat_len; /* reduce length of str remaining */
1517
1518 /* copy substitute into the output string */
1519 new_s += offset; /* move new_s to dest for sub string */
1520 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1521 new_s += sub_len; /* offset new_s past sub string */
1522
1523 /* break when we've done count replacements */
1524 if (--count == 0) break;
1525 }
1526 /* copy any remaining values into output string */
1527 if (len > 0)
1528 memcpy(new_s, str, len);
1529 return out_s;
1530
1531 return_same:
1532 *out_len = -1;
1533 return str;
1534}
1535
1536
1537static char replace__doc__[] =
1538"S.replace (old, new[, maxsplit]) -> string\n\
1539\n\
1540Return a copy of string S with all occurrences of substring\n\
1541old replaced by new. If the optional argument maxsplit is\n\
1542given, only the first maxsplit occurrences are replaced.";
1543
1544static PyObject *
1545string_replace(self, args)
1546 PyStringObject *self;
1547 PyObject *args;
1548{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 const char *str = PyString_AS_STRING(self), *sub, *repl;
1550 char *new_s;
1551 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1552 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555
Guido van Rossum4c08d552000-03-10 22:55:18 +00001556 if (!PyArg_ParseTuple(args, "OO|i:replace",
1557 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001559
1560 if (PyString_Check(subobj)) {
1561 sub = PyString_AS_STRING(subobj);
1562 sub_len = PyString_GET_SIZE(subobj);
1563 }
1564 else if (PyUnicode_Check(subobj))
1565 return PyUnicode_Replace((PyObject *)self,
1566 subobj, replobj, count);
1567 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1568 return NULL;
1569
1570 if (PyString_Check(replobj)) {
1571 repl = PyString_AS_STRING(replobj);
1572 repl_len = PyString_GET_SIZE(replobj);
1573 }
1574 else if (PyUnicode_Check(replobj))
1575 return PyUnicode_Replace((PyObject *)self,
1576 subobj, replobj, count);
1577 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1578 return NULL;
1579
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001580 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001581 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 return NULL;
1583 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 if (new_s == NULL) {
1586 PyErr_NoMemory();
1587 return NULL;
1588 }
1589 if (out_len == -1) {
1590 /* we're returning another reference to self */
1591 new = (PyObject*)self;
1592 Py_INCREF(new);
1593 }
1594 else {
1595 new = PyString_FromStringAndSize(new_s, out_len);
1596 free(new_s);
1597 }
1598 return new;
1599}
1600
1601
1602static char startswith__doc__[] =
1603"S.startswith(prefix[, start[, end]]) -> int\n\
1604\n\
1605Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1606optional start, test S beginning at that position. With optional end, stop\n\
1607comparing S at that position.";
1608
1609static PyObject *
1610string_startswith(self, args)
1611 PyStringObject *self;
1612 PyObject *args;
1613{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 int plen;
1618 int start = 0;
1619 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001620 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622 if (!PyArg_ParseTuple(args, "O|ii:startswith", &subobj, &start, &end))
1623 return NULL;
1624 if (PyString_Check(subobj)) {
1625 prefix = PyString_AS_STRING(subobj);
1626 plen = PyString_GET_SIZE(subobj);
1627 }
1628 else if (PyUnicode_Check(subobj))
1629 return PyInt_FromLong(
1630 PyUnicode_Tailmatch((PyObject *)self,
1631 subobj, start, end, -1));
1632 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633 return NULL;
1634
1635 /* adopt Java semantics for index out of range. it is legal for
1636 * offset to be == plen, but this only returns true if prefix is
1637 * the empty string.
1638 */
1639 if (start < 0 || start+plen > len)
1640 return PyInt_FromLong(0);
1641
1642 if (!memcmp(str+start, prefix, plen)) {
1643 /* did the match end after the specified end? */
1644 if (end < 0)
1645 return PyInt_FromLong(1);
1646 else if (end - start < plen)
1647 return PyInt_FromLong(0);
1648 else
1649 return PyInt_FromLong(1);
1650 }
1651 else return PyInt_FromLong(0);
1652}
1653
1654
1655static char endswith__doc__[] =
1656"S.endswith(suffix[, start[, end]]) -> int\n\
1657\n\
1658Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1659optional start, test S beginning at that position. With optional end, stop\n\
1660comparing S at that position.";
1661
1662static PyObject *
1663string_endswith(self, args)
1664 PyStringObject *self;
1665 PyObject *args;
1666{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001667 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001669 const char* suffix;
1670 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671 int start = 0;
1672 int end = -1;
1673 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675
Guido van Rossum4c08d552000-03-10 22:55:18 +00001676 if (!PyArg_ParseTuple(args, "O|ii:endswith", &subobj, &start, &end))
1677 return NULL;
1678 if (PyString_Check(subobj)) {
1679 suffix = PyString_AS_STRING(subobj);
1680 slen = PyString_GET_SIZE(subobj);
1681 }
1682 else if (PyUnicode_Check(subobj))
1683 return PyInt_FromLong(
1684 PyUnicode_Tailmatch((PyObject *)self,
1685 subobj, start, end, +1));
1686 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687 return NULL;
1688
Guido van Rossum4c08d552000-03-10 22:55:18 +00001689 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690 return PyInt_FromLong(0);
1691
1692 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694
Guido van Rossum4c08d552000-03-10 22:55:18 +00001695 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696 return PyInt_FromLong(1);
1697 else return PyInt_FromLong(0);
1698}
1699
1700
Guido van Rossum4c08d552000-03-10 22:55:18 +00001701static char expandtabs__doc__[] =
1702"S.expandtabs([tabsize]) -> string\n\
1703\n\
1704Return a copy of S where all tab characters are expanded using spaces.\n\
1705If tabsize is not given, a tab size of 8 characters is assumed.";
1706
1707static PyObject*
1708string_expandtabs(PyStringObject *self, PyObject *args)
1709{
1710 const char *e, *p;
1711 char *q;
1712 int i, j;
1713 PyObject *u;
1714 int tabsize = 8;
1715
1716 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1717 return NULL;
1718
1719 /* First pass: determine size of ouput string */
1720 i = j = 0;
1721 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1722 for (p = PyString_AS_STRING(self); p < e; p++)
1723 if (*p == '\t') {
1724 if (tabsize > 0)
1725 j += tabsize - (j % tabsize);
1726 }
1727 else {
1728 j++;
1729 if (*p == '\n' || *p == '\r') {
1730 i += j;
1731 j = 0;
1732 }
1733 }
1734
1735 /* Second pass: create output string and fill it */
1736 u = PyString_FromStringAndSize(NULL, i + j);
1737 if (!u)
1738 return NULL;
1739
1740 j = 0;
1741 q = PyString_AS_STRING(u);
1742
1743 for (p = PyString_AS_STRING(self); p < e; p++)
1744 if (*p == '\t') {
1745 if (tabsize > 0) {
1746 i = tabsize - (j % tabsize);
1747 j += i;
1748 while (i--)
1749 *q++ = ' ';
1750 }
1751 }
1752 else {
1753 j++;
1754 *q++ = *p;
1755 if (*p == '\n' || *p == '\r')
1756 j = 0;
1757 }
1758
1759 return u;
1760}
1761
1762static
1763PyObject *pad(PyStringObject *self,
1764 int left,
1765 int right,
1766 char fill)
1767{
1768 PyObject *u;
1769
1770 if (left < 0)
1771 left = 0;
1772 if (right < 0)
1773 right = 0;
1774
1775 if (left == 0 && right == 0) {
1776 Py_INCREF(self);
1777 return (PyObject *)self;
1778 }
1779
1780 u = PyString_FromStringAndSize(NULL,
1781 left + PyString_GET_SIZE(self) + right);
1782 if (u) {
1783 if (left)
1784 memset(PyString_AS_STRING(u), fill, left);
1785 memcpy(PyString_AS_STRING(u) + left,
1786 PyString_AS_STRING(self),
1787 PyString_GET_SIZE(self));
1788 if (right)
1789 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1790 fill, right);
1791 }
1792
1793 return u;
1794}
1795
1796static char ljust__doc__[] =
1797"S.ljust(width) -> string\n\
1798\n\
1799Return S left justified in a string of length width. Padding is\n\
1800done using spaces.";
1801
1802static PyObject *
1803string_ljust(PyStringObject *self, PyObject *args)
1804{
1805 int width;
1806 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1807 return NULL;
1808
1809 if (PyString_GET_SIZE(self) >= width) {
1810 Py_INCREF(self);
1811 return (PyObject*) self;
1812 }
1813
1814 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1815}
1816
1817
1818static char rjust__doc__[] =
1819"S.rjust(width) -> string\n\
1820\n\
1821Return S right justified in a string of length width. Padding is\n\
1822done using spaces.";
1823
1824static PyObject *
1825string_rjust(PyStringObject *self, PyObject *args)
1826{
1827 int width;
1828 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1829 return NULL;
1830
1831 if (PyString_GET_SIZE(self) >= width) {
1832 Py_INCREF(self);
1833 return (PyObject*) self;
1834 }
1835
1836 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1837}
1838
1839
1840static char center__doc__[] =
1841"S.center(width) -> string\n\
1842\n\
1843Return S centered in a string of length width. Padding is done\n\
1844using spaces.";
1845
1846static PyObject *
1847string_center(PyStringObject *self, PyObject *args)
1848{
1849 int marg, left;
1850 int width;
1851
1852 if (!PyArg_ParseTuple(args, "i:center", &width))
1853 return NULL;
1854
1855 if (PyString_GET_SIZE(self) >= width) {
1856 Py_INCREF(self);
1857 return (PyObject*) self;
1858 }
1859
1860 marg = width - PyString_GET_SIZE(self);
1861 left = marg / 2 + (marg & width & 1);
1862
1863 return pad(self, left, marg - left, ' ');
1864}
1865
1866#if 0
1867static char zfill__doc__[] =
1868"S.zfill(width) -> string\n\
1869\n\
1870Pad a numeric string x with zeros on the left, to fill a field\n\
1871of the specified width. The string x is never truncated.";
1872
1873static PyObject *
1874string_zfill(PyStringObject *self, PyObject *args)
1875{
1876 int fill;
1877 PyObject *u;
1878 char *str;
1879
1880 int width;
1881 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1882 return NULL;
1883
1884 if (PyString_GET_SIZE(self) >= width) {
1885 Py_INCREF(self);
1886 return (PyObject*) self;
1887 }
1888
1889 fill = width - PyString_GET_SIZE(self);
1890
1891 u = pad(self, fill, 0, '0');
1892 if (u == NULL)
1893 return NULL;
1894
1895 str = PyString_AS_STRING(u);
1896 if (str[fill] == '+' || str[fill] == '-') {
1897 /* move sign to beginning of string */
1898 str[0] = str[fill];
1899 str[fill] = '0';
1900 }
1901
1902 return u;
1903}
1904#endif
1905
1906static char isspace__doc__[] =
1907"S.isspace() -> int\n\
1908\n\
1909Return 1 if there are only whitespace characters in S,\n\
19100 otherwise.";
1911
1912static PyObject*
1913string_isspace(PyStringObject *self, PyObject *args)
1914{
1915 register const char *p = PyString_AS_STRING(self);
1916 register const char *e;
1917
1918 if (!PyArg_NoArgs(args))
1919 return NULL;
1920
1921 /* Shortcut for single character strings */
1922 if (PyString_GET_SIZE(self) == 1 &&
1923 isspace(*p))
1924 return PyInt_FromLong(1);
1925
1926 e = p + PyString_GET_SIZE(self);
1927 for (; p < e; p++) {
1928 if (!isspace(*p))
1929 return PyInt_FromLong(0);
1930 }
1931 return PyInt_FromLong(1);
1932}
1933
1934
1935static char isdigit__doc__[] =
1936"S.isdigit() -> int\n\
1937\n\
1938Return 1 if there are only digit characters in S,\n\
19390 otherwise.";
1940
1941static PyObject*
1942string_isdigit(PyStringObject *self, PyObject *args)
1943{
1944 register const char *p = PyString_AS_STRING(self);
1945 register const char *e;
1946
1947 if (!PyArg_NoArgs(args))
1948 return NULL;
1949
1950 /* Shortcut for single character strings */
1951 if (PyString_GET_SIZE(self) == 1 &&
1952 isdigit(*p))
1953 return PyInt_FromLong(1);
1954
1955 e = p + PyString_GET_SIZE(self);
1956 for (; p < e; p++) {
1957 if (!isdigit(*p))
1958 return PyInt_FromLong(0);
1959 }
1960 return PyInt_FromLong(1);
1961}
1962
1963
1964static char islower__doc__[] =
1965"S.islower() -> int\n\
1966\n\
1967Return 1 if all cased characters in S are lowercase and there is\n\
1968at least one cased character in S, 0 otherwise.";
1969
1970static PyObject*
1971string_islower(PyStringObject *self, PyObject *args)
1972{
1973 register const char *p = PyString_AS_STRING(self);
1974 register const char *e;
1975 int cased;
1976
1977 if (!PyArg_NoArgs(args))
1978 return NULL;
1979
1980 /* Shortcut for single character strings */
1981 if (PyString_GET_SIZE(self) == 1)
1982 return PyInt_FromLong(islower(*p) != 0);
1983
1984 e = p + PyString_GET_SIZE(self);
1985 cased = 0;
1986 for (; p < e; p++) {
1987 if (isupper(*p))
1988 return PyInt_FromLong(0);
1989 else if (!cased && islower(*p))
1990 cased = 1;
1991 }
1992 return PyInt_FromLong(cased);
1993}
1994
1995
1996static char isupper__doc__[] =
1997"S.isupper() -> int\n\
1998\n\
1999Return 1 if all cased characters in S are uppercase and there is\n\
2000at least one cased character in S, 0 otherwise.";
2001
2002static PyObject*
2003string_isupper(PyStringObject *self, PyObject *args)
2004{
2005 register const char *p = PyString_AS_STRING(self);
2006 register const char *e;
2007 int cased;
2008
2009 if (!PyArg_NoArgs(args))
2010 return NULL;
2011
2012 /* Shortcut for single character strings */
2013 if (PyString_GET_SIZE(self) == 1)
2014 return PyInt_FromLong(isupper(*p) != 0);
2015
2016 e = p + PyString_GET_SIZE(self);
2017 cased = 0;
2018 for (; p < e; p++) {
2019 if (islower(*p))
2020 return PyInt_FromLong(0);
2021 else if (!cased && isupper(*p))
2022 cased = 1;
2023 }
2024 return PyInt_FromLong(cased);
2025}
2026
2027
2028static char istitle__doc__[] =
2029"S.istitle() -> int\n\
2030\n\
2031Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2032may only follow uncased characters and lowercase characters only cased\n\
2033ones. Return 0 otherwise.";
2034
2035static PyObject*
2036string_istitle(PyStringObject *self, PyObject *args)
2037{
2038 register const char *p = PyString_AS_STRING(self);
2039 register const char *e;
2040 int cased, previous_is_cased;
2041
2042 if (!PyArg_NoArgs(args))
2043 return NULL;
2044
2045 /* Shortcut for single character strings */
2046 if (PyString_GET_SIZE(self) == 1)
2047 return PyInt_FromLong(isupper(*p) != 0);
2048
2049 e = p + PyString_GET_SIZE(self);
2050 cased = 0;
2051 previous_is_cased = 0;
2052 for (; p < e; p++) {
2053 register const char ch = *p;
2054
2055 if (isupper(ch)) {
2056 if (previous_is_cased)
2057 return PyInt_FromLong(0);
2058 previous_is_cased = 1;
2059 cased = 1;
2060 }
2061 else if (islower(ch)) {
2062 if (!previous_is_cased)
2063 return PyInt_FromLong(0);
2064 previous_is_cased = 1;
2065 cased = 1;
2066 }
2067 else
2068 previous_is_cased = 0;
2069 }
2070 return PyInt_FromLong(cased);
2071}
2072
2073
2074static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002075"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002076\n\
2077Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002078Line breaks are not included in the resulting list unless keepends\n\
2079is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080
2081#define SPLIT_APPEND(data, left, right) \
2082 str = PyString_FromStringAndSize(data + left, right - left); \
2083 if (!str) \
2084 goto onError; \
2085 if (PyList_Append(list, str)) { \
2086 Py_DECREF(str); \
2087 goto onError; \
2088 } \
2089 else \
2090 Py_DECREF(str);
2091
2092static PyObject*
2093string_splitlines(PyStringObject *self, PyObject *args)
2094{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002095 register int i;
2096 register int j;
2097 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002098 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099 PyObject *list;
2100 PyObject *str;
2101 char *data;
2102
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002103 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002104 return NULL;
2105
2106 data = PyString_AS_STRING(self);
2107 len = PyString_GET_SIZE(self);
2108
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 list = PyList_New(0);
2110 if (!list)
2111 goto onError;
2112
2113 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002114 int eol;
2115
Guido van Rossum4c08d552000-03-10 22:55:18 +00002116 /* Find a line and append it */
2117 while (i < len && data[i] != '\n' && data[i] != '\r')
2118 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002119
2120 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002121 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002122 if (i < len) {
2123 if (data[i] == '\r' && i + 1 < len &&
2124 data[i+1] == '\n')
2125 i += 2;
2126 else
2127 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002128 if (keepends)
2129 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002131 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002132 j = i;
2133 }
2134 if (j < len) {
2135 SPLIT_APPEND(data, j, len);
2136 }
2137
2138 return list;
2139
2140 onError:
2141 Py_DECREF(list);
2142 return NULL;
2143}
2144
2145#undef SPLIT_APPEND
2146
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147
2148static PyMethodDef
2149string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150 /* Counterparts of the obsolete stropmodule functions; except
2151 string.maketrans(). */
2152 {"join", (PyCFunction)string_join, 1, join__doc__},
2153 {"split", (PyCFunction)string_split, 1, split__doc__},
2154 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2155 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2156 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2157 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2158 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2159 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2160 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2162 {"count", (PyCFunction)string_count, 1, count__doc__},
2163 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2164 {"find", (PyCFunction)string_find, 1, find__doc__},
2165 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2168 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2169 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2170 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2172 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2173 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2175 {"title", (PyCFunction)string_title, 1, title__doc__},
2176 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2177 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2178 {"center", (PyCFunction)string_center, 1, center__doc__},
2179 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2180 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2181#if 0
2182 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2183#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 {NULL, NULL} /* sentinel */
2185};
2186
2187static PyObject *
2188string_getattr(s, name)
2189 PyStringObject *s;
2190 char *name;
2191{
2192 return Py_FindMethod(string_methods, (PyObject*)s, name);
2193}
2194
2195
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002196PyTypeObject PyString_Type = {
2197 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002198 0,
2199 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002200 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002201 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002202 (destructor)string_dealloc, /*tp_dealloc*/
2203 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002205 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002206 (cmpfunc)string_compare, /*tp_compare*/
2207 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002208 0, /*tp_as_number*/
2209 &string_as_sequence, /*tp_as_sequence*/
2210 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002211 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002212 0, /*tp_call*/
2213 0, /*tp_str*/
2214 0, /*tp_getattro*/
2215 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002216 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002217 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002218 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002219};
2220
2221void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002222PyString_Concat(pv, w)
2223 register PyObject **pv;
2224 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002225{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002226 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002227 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002228 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002229 if (w == NULL || !PyString_Check(*pv)) {
2230 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002231 *pv = NULL;
2232 return;
2233 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002234 v = string_concat((PyStringObject *) *pv, w);
2235 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002236 *pv = v;
2237}
2238
Guido van Rossum013142a1994-08-30 08:19:36 +00002239void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002240PyString_ConcatAndDel(pv, w)
2241 register PyObject **pv;
2242 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002243{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002244 PyString_Concat(pv, w);
2245 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002246}
2247
2248
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002249/* The following function breaks the notion that strings are immutable:
2250 it changes the size of a string. We get away with this only if there
2251 is only one module referencing the object. You can also think of it
2252 as creating a new string object and destroying the old one, only
2253 more efficiently. In any case, don't use this if the string may
2254 already be known to some other part of the code... */
2255
2256int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002257_PyString_Resize(pv, newsize)
2258 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002259 int newsize;
2260{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002261 register PyObject *v;
2262 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002263 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002264 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002265 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002266 Py_DECREF(v);
2267 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002268 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002269 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002270 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002271#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002272 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002273#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002274 _Py_ForgetReference(v);
2275 *pv = (PyObject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002276 realloc((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002277 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002278 if (*pv == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002279 PyMem_DEL(v);
2280 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002281 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002282 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002283 _Py_NewReference(*pv);
2284 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002285 sv->ob_size = newsize;
2286 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002287 return 0;
2288}
Guido van Rossume5372401993-03-16 12:15:04 +00002289
2290/* Helpers for formatstring */
2291
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002292static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002293getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002294 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002295 int arglen;
2296 int *p_argidx;
2297{
2298 int argidx = *p_argidx;
2299 if (argidx < arglen) {
2300 (*p_argidx)++;
2301 if (arglen < 0)
2302 return args;
2303 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002304 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002305 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002306 PyErr_SetString(PyExc_TypeError,
2307 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002308 return NULL;
2309}
2310
2311#define F_LJUST (1<<0)
2312#define F_SIGN (1<<1)
2313#define F_BLANK (1<<2)
2314#define F_ALT (1<<3)
2315#define F_ZERO (1<<4)
2316
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002317static int
2318formatfloat(buf, flags, prec, type, v)
2319 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002320 int flags;
2321 int prec;
2322 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002323 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002324{
2325 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002326 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002327 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002328 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002329 if (prec < 0)
2330 prec = 6;
2331 if (prec > 50)
2332 prec = 50; /* Arbitrary limitation */
2333 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2334 type = 'g';
2335 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2336 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002337 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002338}
2339
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002340static int
2341formatint(buf, flags, prec, type, v)
2342 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002343 int flags;
2344 int prec;
2345 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002346 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002347{
2348 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002349 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002350 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002351 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002352 if (prec < 0)
2353 prec = 1;
2354 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2355 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002356 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002357}
2358
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002359static int
2360formatchar(buf, v)
2361 char *buf;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002362 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002363{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002364 if (PyString_Check(v)) {
2365 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002366 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002367 }
2368 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002369 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002370 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002371 }
2372 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002373 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002374}
2375
Guido van Rossum013142a1994-08-30 08:19:36 +00002376
Guido van Rossume5372401993-03-16 12:15:04 +00002377/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
2378
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002379PyObject *
2380PyString_Format(format, args)
2381 PyObject *format;
2382 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002383{
2384 char *fmt, *res;
2385 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002386 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002387 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002388 PyObject *dict = NULL;
2389 if (format == NULL || !PyString_Check(format) || args == NULL) {
2390 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002391 return NULL;
2392 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002393 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002394 fmt = PyString_AsString(format);
2395 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002396 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002397 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002398 if (result == NULL)
2399 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002400 res = PyString_AsString(result);
2401 if (PyTuple_Check(args)) {
2402 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002403 argidx = 0;
2404 }
2405 else {
2406 arglen = -1;
2407 argidx = -2;
2408 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002409 if (args->ob_type->tp_as_mapping)
2410 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002411 while (--fmtcnt >= 0) {
2412 if (*fmt != '%') {
2413 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002414 rescnt = fmtcnt + 100;
2415 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002416 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002417 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002418 res = PyString_AsString(result)
2419 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002420 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002421 }
2422 *res++ = *fmt++;
2423 }
2424 else {
2425 /* Got a format specifier */
2426 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002427 int width = -1;
2428 int prec = -1;
2429 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002430 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002431 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002432 PyObject *v = NULL;
2433 PyObject *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +00002434 char *buf;
2435 int sign;
2436 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002437 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002438 char *fmt_start = fmt;
2439
Guido van Rossumda9c2711996-12-05 21:58:58 +00002440 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002441 if (*fmt == '(') {
2442 char *keystart;
2443 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002444 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002445 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002446
2447 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002448 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002449 "format requires a mapping");
2450 goto error;
2451 }
2452 ++fmt;
2453 --fmtcnt;
2454 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002455 /* Skip over balanced parentheses */
2456 while (pcount > 0 && --fmtcnt >= 0) {
2457 if (*fmt == ')')
2458 --pcount;
2459 else if (*fmt == '(')
2460 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002461 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002462 }
2463 keylen = fmt - keystart - 1;
2464 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002465 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002466 "incomplete format key");
2467 goto error;
2468 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002469 key = PyString_FromStringAndSize(keystart,
2470 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002471 if (key == NULL)
2472 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002473 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002474 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002475 args_owned = 0;
2476 }
2477 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002478 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002479 if (args == NULL) {
2480 goto error;
2481 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002482 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002483 arglen = -1;
2484 argidx = -2;
2485 }
Guido van Rossume5372401993-03-16 12:15:04 +00002486 while (--fmtcnt >= 0) {
2487 switch (c = *fmt++) {
2488 case '-': flags |= F_LJUST; continue;
2489 case '+': flags |= F_SIGN; continue;
2490 case ' ': flags |= F_BLANK; continue;
2491 case '#': flags |= F_ALT; continue;
2492 case '0': flags |= F_ZERO; continue;
2493 }
2494 break;
2495 }
2496 if (c == '*') {
2497 v = getnextarg(args, arglen, &argidx);
2498 if (v == NULL)
2499 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002500 if (!PyInt_Check(v)) {
2501 PyErr_SetString(PyExc_TypeError,
2502 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002503 goto error;
2504 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002505 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002506 if (width < 0) {
2507 flags |= F_LJUST;
2508 width = -width;
2509 }
Guido van Rossume5372401993-03-16 12:15:04 +00002510 if (--fmtcnt >= 0)
2511 c = *fmt++;
2512 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002513 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002514 width = c - '0';
2515 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002516 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002517 if (!isdigit(c))
2518 break;
2519 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002520 PyErr_SetString(
2521 PyExc_ValueError,
2522 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002523 goto error;
2524 }
2525 width = width*10 + (c - '0');
2526 }
2527 }
2528 if (c == '.') {
2529 prec = 0;
2530 if (--fmtcnt >= 0)
2531 c = *fmt++;
2532 if (c == '*') {
2533 v = getnextarg(args, arglen, &argidx);
2534 if (v == NULL)
2535 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002536 if (!PyInt_Check(v)) {
2537 PyErr_SetString(
2538 PyExc_TypeError,
2539 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002540 goto error;
2541 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002542 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002543 if (prec < 0)
2544 prec = 0;
2545 if (--fmtcnt >= 0)
2546 c = *fmt++;
2547 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002548 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002549 prec = c - '0';
2550 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002551 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002552 if (!isdigit(c))
2553 break;
2554 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002555 PyErr_SetString(
2556 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002557 "prec too big");
2558 goto error;
2559 }
2560 prec = prec*10 + (c - '0');
2561 }
2562 }
2563 } /* prec */
2564 if (fmtcnt >= 0) {
2565 if (c == 'h' || c == 'l' || c == 'L') {
2566 size = c;
2567 if (--fmtcnt >= 0)
2568 c = *fmt++;
2569 }
2570 }
2571 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002572 PyErr_SetString(PyExc_ValueError,
2573 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002574 goto error;
2575 }
2576 if (c != '%') {
2577 v = getnextarg(args, arglen, &argidx);
2578 if (v == NULL)
2579 goto error;
2580 }
2581 sign = 0;
2582 fill = ' ';
2583 switch (c) {
2584 case '%':
2585 buf = "%";
2586 len = 1;
2587 break;
2588 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002589 case 'r':
2590 if (PyUnicode_Check(v)) {
2591 fmt = fmt_start;
2592 goto unicode;
2593 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002594 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002595 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002596 else
2597 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002598 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002599 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002600 if (!PyString_Check(temp)) {
2601 PyErr_SetString(PyExc_TypeError,
2602 "%s argument has non-string str()");
2603 goto error;
2604 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002605 buf = PyString_AsString(temp);
2606 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002607 if (prec >= 0 && len > prec)
2608 len = prec;
2609 break;
2610 case 'i':
2611 case 'd':
2612 case 'u':
2613 case 'o':
2614 case 'x':
2615 case 'X':
2616 if (c == 'i')
2617 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002618 buf = tmpbuf;
2619 len = formatint(buf, flags, prec, c, v);
2620 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002621 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002622 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002623 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002624 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002625 if ((flags&F_ALT) &&
2626 (c == 'x' || c == 'X') &&
2627 buf[0] == '0' && buf[1] == c) {
2628 *res++ = *buf++;
2629 *res++ = *buf++;
2630 rescnt -= 2;
2631 len -= 2;
2632 width -= 2;
2633 if (width < 0)
2634 width = 0;
2635 }
2636 }
Guido van Rossume5372401993-03-16 12:15:04 +00002637 break;
2638 case 'e':
2639 case 'E':
2640 case 'f':
2641 case 'g':
2642 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002643 buf = tmpbuf;
2644 len = formatfloat(buf, flags, prec, c, v);
2645 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002646 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002647 sign = 1;
2648 if (flags&F_ZERO)
2649 fill = '0';
2650 break;
2651 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002652 buf = tmpbuf;
2653 len = formatchar(buf, v);
2654 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002655 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002656 break;
2657 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002658 PyErr_Format(PyExc_ValueError,
2659 "unsupported format character '%c' (0x%x)",
2660 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002661 goto error;
2662 }
2663 if (sign) {
2664 if (*buf == '-' || *buf == '+') {
2665 sign = *buf++;
2666 len--;
2667 }
2668 else if (flags & F_SIGN)
2669 sign = '+';
2670 else if (flags & F_BLANK)
2671 sign = ' ';
2672 else
2673 sign = '\0';
2674 }
2675 if (width < len)
2676 width = len;
2677 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002678 reslen -= rescnt;
2679 rescnt = width + fmtcnt + 100;
2680 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002681 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002682 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002683 res = PyString_AsString(result)
2684 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002685 }
2686 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002687 if (fill != ' ')
2688 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002689 rescnt--;
2690 if (width > len)
2691 width--;
2692 }
2693 if (width > len && !(flags&F_LJUST)) {
2694 do {
2695 --rescnt;
2696 *res++ = fill;
2697 } while (--width > len);
2698 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002699 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002700 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002701 memcpy(res, buf, len);
2702 res += len;
2703 rescnt -= len;
2704 while (--width >= len) {
2705 --rescnt;
2706 *res++ = ' ';
2707 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002708 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002709 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002710 "not all arguments converted");
2711 goto error;
2712 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002713 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002714 } /* '%' */
2715 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002716 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002717 PyErr_SetString(PyExc_TypeError,
2718 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002719 goto error;
2720 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002721 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002722 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002723 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002724 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002725 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002726
2727 unicode:
2728 if (args_owned) {
2729 Py_DECREF(args);
2730 args_owned = 0;
2731 }
2732 /* Fiddle args right (remove the first argidx-1 arguments) */
2733 --argidx;
2734 if (PyTuple_Check(orig_args) && argidx > 0) {
2735 PyObject *v;
2736 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2737 v = PyTuple_New(n);
2738 if (v == NULL)
2739 goto error;
2740 while (--n >= 0) {
2741 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2742 Py_INCREF(w);
2743 PyTuple_SET_ITEM(v, n, w);
2744 }
2745 args = v;
2746 } else {
2747 Py_INCREF(orig_args);
2748 args = orig_args;
2749 }
2750 /* Paste rest of format string to what we have of the result
2751 string; we reuse result for this */
2752 rescnt = res - PyString_AS_STRING(result);
2753 fmtcnt = PyString_GET_SIZE(format) - \
2754 (fmt - PyString_AS_STRING(format));
2755 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2756 Py_DECREF(args);
2757 goto error;
2758 }
2759 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2760 format = result;
2761 /* Let Unicode do its magic */
2762 result = PyUnicode_Format(format, args);
2763 Py_DECREF(format);
2764 Py_DECREF(args);
2765 return result;
2766
Guido van Rossume5372401993-03-16 12:15:04 +00002767 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002768 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002769 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002770 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002771 }
Guido van Rossume5372401993-03-16 12:15:04 +00002772 return NULL;
2773}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002774
2775
2776#ifdef INTERN_STRINGS
2777
2778static PyObject *interned;
2779
2780void
2781PyString_InternInPlace(p)
2782 PyObject **p;
2783{
2784 register PyStringObject *s = (PyStringObject *)(*p);
2785 PyObject *t;
2786 if (s == NULL || !PyString_Check(s))
2787 Py_FatalError("PyString_InternInPlace: strings only please!");
2788 if ((t = s->ob_sinterned) != NULL) {
2789 if (t == (PyObject *)s)
2790 return;
2791 Py_INCREF(t);
2792 *p = t;
2793 Py_DECREF(s);
2794 return;
2795 }
2796 if (interned == NULL) {
2797 interned = PyDict_New();
2798 if (interned == NULL)
2799 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002800 }
2801 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2802 Py_INCREF(t);
2803 *p = s->ob_sinterned = t;
2804 Py_DECREF(s);
2805 return;
2806 }
2807 t = (PyObject *)s;
2808 if (PyDict_SetItem(interned, t, t) == 0) {
2809 s->ob_sinterned = t;
2810 return;
2811 }
2812 PyErr_Clear();
2813}
2814
2815
2816PyObject *
2817PyString_InternFromString(cp)
2818 const char *cp;
2819{
2820 PyObject *s = PyString_FromString(cp);
2821 if (s == NULL)
2822 return NULL;
2823 PyString_InternInPlace(&s);
2824 return s;
2825}
2826
2827#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002828
2829void
2830PyString_Fini()
2831{
2832 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002833 for (i = 0; i < UCHAR_MAX + 1; i++) {
2834 Py_XDECREF(characters[i]);
2835 characters[i] = NULL;
2836 }
2837#ifndef DONT_SHARE_SHORT_STRINGS
2838 Py_XDECREF(nullstring);
2839 nullstring = NULL;
2840#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002841#ifdef INTERN_STRINGS
2842 if (interned) {
2843 int pos, changed;
2844 PyObject *key, *value;
2845 do {
2846 changed = 0;
2847 pos = 0;
2848 while (PyDict_Next(interned, &pos, &key, &value)) {
2849 if (key->ob_refcnt == 2 && key == value) {
2850 PyDict_DelItem(interned, key);
2851 changed = 1;
2852 }
2853 }
2854 } while (changed);
2855 }
2856#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002857}