blob: 10257f7562dada2fd60acb9a5fd53a78888704ce [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000029
30******************************************************************/
31
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000032/* String object implementation */
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000035
Guido van Rossum71160aa1997-06-03 18:03:18 +000036#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000037#include <ctype.h>
38
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000039#ifdef COUNT_ALLOCS
40int null_strings, one_strings;
41#endif
42
Guido van Rossum03093a21994-09-28 15:51:32 +000043#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000044#include <limits.h>
45#else
46#ifndef UCHAR_MAX
47#define UCHAR_MAX 255
48#endif
49#endif
50
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000052#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000053static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000054#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055
56/*
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
64 not NULL.
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
70*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000071PyObject *
72PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000073 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000074 int size;
75{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000077#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 if (size == 0 && (op = nullstring) != NULL) {
79#ifdef COUNT_ALLOCS
80 null_strings++;
81#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
83 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 if (size == 1 && str != NULL &&
86 (op = characters[*str & UCHAR_MAX]) != NULL)
87 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088#ifdef COUNT_ALLOCS
89 one_strings++;
90#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 Py_INCREF(op);
92 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 op = (PyStringObject *)
96 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000097 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return PyErr_NoMemory();
99 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000100 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101#ifdef CACHE_HASH
102 op->ob_shash = -1;
103#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000104#ifdef INTERN_STRINGS
105 op->ob_sinterned = NULL;
106#endif
Guido van Rossumbffd6832000-01-20 22:32:56 +0000107 _Py_NewReference((PyObject *)op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000108 if (str != NULL)
109 memcpy(op->ob_sval, str, size);
110 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0) {
113 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000115 } else if (size == 1 && str != NULL) {
116 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000121}
122
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123PyObject *
124PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000125 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126{
127 register unsigned int size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000129#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 if (size == 0 && (op = nullstring) != NULL) {
131#ifdef COUNT_ALLOCS
132 null_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138#ifdef COUNT_ALLOCS
139 one_strings++;
140#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000144#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000145 op = (PyStringObject *)
146 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000147 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 return PyErr_NoMemory();
149 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000150 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151#ifdef CACHE_HASH
152 op->ob_shash = -1;
153#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000154#ifdef INTERN_STRINGS
155 op->ob_sinterned = NULL;
156#endif
Guido van Rossumbffd6832000-01-20 22:32:56 +0000157 _Py_NewReference((PyObject *)op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000158 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000159#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 if (size == 0) {
161 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000162 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000163 } else if (size == 1) {
164 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000165 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000166 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000167#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000168 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169}
170
Guido van Rossum234f9421993-06-17 12:35:49 +0000171static void
Guido van Rossume5372401993-03-16 12:15:04 +0000172string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000173 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000174{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000175 PyMem_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000176}
177
Guido van Rossumd7047b31995-01-02 19:07:15 +0000178int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000179PyString_Size(op)
180 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000181{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000182 if (!PyString_Check(op)) {
183 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000184 return -1;
185 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000186 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000187}
188
189/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000190PyString_AsString(op)
191 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000192{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000193 if (!PyString_Check(op)) {
194 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 return NULL;
196 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000197 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000198}
199
200/* Methods */
201
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000202static int
Guido van Rossume5372401993-03-16 12:15:04 +0000203string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000204 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 FILE *fp;
206 int flags;
207{
208 int i;
209 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000210 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000211 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000212 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000214 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216
217 /* figure out which quote to use; single is prefered */
218 quote = '\'';
219 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
220 quote = '"';
221
222 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223 for (i = 0; i < op->ob_size; i++) {
224 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000225 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000226 fprintf(fp, "\\%c", c);
227 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000228 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000230 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000232 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000233 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000234}
235
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000236static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000237string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239{
240 /* XXX overflow? */
241 int newsize = 2 + 4 * op->ob_size * sizeof(char);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000242 PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000243 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000244 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000245 }
246 else {
247 register int i;
248 register char c;
249 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000250 int quote;
251
252 /* figure out which quote to use; single is prefered */
253 quote = '\'';
254 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
255 quote = '"';
256
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000257 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 for (i = 0; i < op->ob_size; i++) {
260 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000261 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000262 *p++ = '\\', *p++ = c;
263 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000264 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265 while (*p != '\0')
266 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000267 }
268 else
269 *p++ = c;
270 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000271 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000273 _PyString_Resize(
274 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000275 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277}
278
279static int
Guido van Rossume5372401993-03-16 12:15:04 +0000280string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000281 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000282{
283 return a->ob_size;
284}
285
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000286static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000287string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000288 register PyStringObject *a;
289 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290{
291 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 register PyStringObject *op;
293 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000294 if (PyUnicode_Check(bb))
295 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000296 PyErr_BadArgument();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297 return NULL;
298 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000299#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300 /* Optimize cases with empty left or right operand */
301 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000302 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303 return bb;
304 }
305 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000306 Py_INCREF(a);
307 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000308 }
309 size = a->ob_size + b->ob_size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000310 op = (PyStringObject *)
311 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000312 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000313 return PyErr_NoMemory();
314 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000315 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000316#ifdef CACHE_HASH
317 op->ob_shash = -1;
318#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000319#ifdef INTERN_STRINGS
320 op->ob_sinterned = NULL;
321#endif
Guido van Rossumbffd6832000-01-20 22:32:56 +0000322 _Py_NewReference((PyObject *)op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000323 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
324 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
325 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000326 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000327#undef b
328}
329
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000330static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000331string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000332 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333 register int n;
334{
335 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000336 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000337 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 if (n < 0)
339 n = 0;
340 size = a->ob_size * n;
341 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000342 Py_INCREF(a);
343 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000344 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 op = (PyStringObject *)
346 malloc(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000347 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000348 return PyErr_NoMemory();
349 op->ob_type = &PyString_Type;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000350 op->ob_size = size;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000351#ifdef CACHE_HASH
352 op->ob_shash = -1;
353#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000354#ifdef INTERN_STRINGS
355 op->ob_sinterned = NULL;
356#endif
Guido van Rossumbffd6832000-01-20 22:32:56 +0000357 _Py_NewReference((PyObject *)op);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000358 for (i = 0; i < size; i += a->ob_size)
359 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
360 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000361 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000362}
363
364/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
365
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000367string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000368 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000369 register int i, j; /* May be negative! */
370{
371 if (i < 0)
372 i = 0;
373 if (j < 0)
374 j = 0; /* Avoid signed/unsigned bug in next line */
375 if (j > a->ob_size)
376 j = a->ob_size;
377 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000378 Py_INCREF(a);
379 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380 }
381 if (j < i)
382 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000383 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000384}
385
Guido van Rossum9284a572000-03-07 15:53:43 +0000386static int
387string_contains(a, el)
388PyObject *a, *el;
389{
390 register char *s, *end;
391 register char c;
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000392 if (!PyString_Check(el))
393 return PyUnicode_Contains(a, el);
394 if (PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000395 PyErr_SetString(PyExc_TypeError,
396 "string member test needs char left operand");
397 return -1;
398 }
399 c = PyString_AsString(el)[0];
400 s = PyString_AsString(a);
401 end = s + PyString_Size(a);
402 while (s < end) {
403 if (c == *s++)
404 return 1;
405 }
406 return 0;
407}
408
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000410string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000411 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 register int i;
413{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000414 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000416 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000418 return NULL;
419 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000420 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000422#ifdef COUNT_ALLOCS
423 if (v != NULL)
424 one_strings++;
425#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000426 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000428 if (v == NULL)
429 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430 characters[c] = (PyStringObject *) v;
431 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000432 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000434 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000435}
436
437static int
Guido van Rossume5372401993-03-16 12:15:04 +0000438string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440{
Guido van Rossum253919f1991-02-13 23:18:39 +0000441 int len_a = a->ob_size, len_b = b->ob_size;
442 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000443 int cmp;
444 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000445 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000446 if (cmp == 0)
447 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
448 if (cmp != 0)
449 return cmp;
450 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000451 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000452}
453
Guido van Rossum9bfef441993-03-29 10:43:31 +0000454static long
455string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000456 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000457{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000458 register int len;
459 register unsigned char *p;
460 register long x;
461
462#ifdef CACHE_HASH
463 if (a->ob_shash != -1)
464 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000465#ifdef INTERN_STRINGS
466 if (a->ob_sinterned != NULL)
467 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000468 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000469#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000470#endif
471 len = a->ob_size;
472 p = (unsigned char *) a->ob_sval;
473 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000474 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000475 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000476 x ^= a->ob_size;
477 if (x == -1)
478 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000479#ifdef CACHE_HASH
480 a->ob_shash = x;
481#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000482 return x;
483}
484
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000485static int
486string_buffer_getreadbuf(self, index, ptr)
487 PyStringObject *self;
488 int index;
489 const void **ptr;
490{
491 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000492 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000493 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000494 return -1;
495 }
496 *ptr = (void *)self->ob_sval;
497 return self->ob_size;
498}
499
500static int
501string_buffer_getwritebuf(self, index, ptr)
502 PyStringObject *self;
503 int index;
504 const void **ptr;
505{
Guido van Rossum045e6881997-09-08 18:30:11 +0000506 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000507 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000508 return -1;
509}
510
511static int
512string_buffer_getsegcount(self, lenp)
513 PyStringObject *self;
514 int *lenp;
515{
516 if ( lenp )
517 *lenp = self->ob_size;
518 return 1;
519}
520
Guido van Rossum1db70701998-10-08 02:18:52 +0000521static int
522string_buffer_getcharbuf(self, index, ptr)
523 PyStringObject *self;
524 int index;
525 const char **ptr;
526{
527 if ( index != 0 ) {
528 PyErr_SetString(PyExc_SystemError,
529 "accessing non-existent string segment");
530 return -1;
531 }
532 *ptr = self->ob_sval;
533 return self->ob_size;
534}
535
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000536static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000537 (inquiry)string_length, /*sq_length*/
538 (binaryfunc)string_concat, /*sq_concat*/
539 (intargfunc)string_repeat, /*sq_repeat*/
540 (intargfunc)string_item, /*sq_item*/
541 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000542 0, /*sq_ass_item*/
543 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000544 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000545};
546
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000547static PyBufferProcs string_as_buffer = {
548 (getreadbufferproc)string_buffer_getreadbuf,
549 (getwritebufferproc)string_buffer_getwritebuf,
550 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000551 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000552};
553
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000554
555
556#define LEFTSTRIP 0
557#define RIGHTSTRIP 1
558#define BOTHSTRIP 2
559
560
561static PyObject *
562split_whitespace(s, len, maxsplit)
563 char *s;
564 int len;
565 int maxsplit;
566{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000567 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000568 PyObject* item;
569 PyObject *list = PyList_New(0);
570
571 if (list == NULL)
572 return NULL;
573
Guido van Rossum4c08d552000-03-10 22:55:18 +0000574 for (i = j = 0; i < len; ) {
575 while (i < len && isspace(Py_CHARMASK(s[i])))
576 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000577 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000578 while (i < len && !isspace(Py_CHARMASK(s[i])))
579 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000580 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000581 if (maxsplit-- <= 0)
582 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000583 item = PyString_FromStringAndSize(s+j, (int)(i-j));
584 if (item == NULL)
585 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000586 err = PyList_Append(list, item);
587 Py_DECREF(item);
588 if (err < 0)
589 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000590 while (i < len && isspace(Py_CHARMASK(s[i])))
591 i++;
592 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000593 }
594 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000595 if (j < len) {
596 item = PyString_FromStringAndSize(s+j, (int)(len - j));
597 if (item == NULL)
598 goto finally;
599 err = PyList_Append(list, item);
600 Py_DECREF(item);
601 if (err < 0)
602 goto finally;
603 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000604 return list;
605 finally:
606 Py_DECREF(list);
607 return NULL;
608}
609
610
611static char split__doc__[] =
612"S.split([sep [,maxsplit]]) -> list of strings\n\
613\n\
614Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000615delimiter string. If maxsplit is given, at most maxsplit\n\
616splits are done. If sep is not specified, any whitespace string\n\
617is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000618
619static PyObject *
620string_split(self, args)
621 PyStringObject *self;
622 PyObject *args;
623{
624 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 int maxsplit = -1;
626 const char *s = PyString_AS_STRING(self), *sub;
627 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000628
Guido van Rossum4c08d552000-03-10 22:55:18 +0000629 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000630 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000631 if (maxsplit < 0)
632 maxsplit = INT_MAX;
633 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000634 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000635 if (PyString_Check(subobj)) {
636 sub = PyString_AS_STRING(subobj);
637 n = PyString_GET_SIZE(subobj);
638 }
639 else if (PyUnicode_Check(subobj))
640 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
641 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
642 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000643 if (n == 0) {
644 PyErr_SetString(PyExc_ValueError, "empty separator");
645 return NULL;
646 }
647
648 list = PyList_New(0);
649 if (list == NULL)
650 return NULL;
651
652 i = j = 0;
653 while (i+n <= len) {
654 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000655 if (maxsplit-- <= 0)
656 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000657 item = PyString_FromStringAndSize(s+j, (int)(i-j));
658 if (item == NULL)
659 goto fail;
660 err = PyList_Append(list, item);
661 Py_DECREF(item);
662 if (err < 0)
663 goto fail;
664 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000665 }
666 else
667 i++;
668 }
669 item = PyString_FromStringAndSize(s+j, (int)(len-j));
670 if (item == NULL)
671 goto fail;
672 err = PyList_Append(list, item);
673 Py_DECREF(item);
674 if (err < 0)
675 goto fail;
676
677 return list;
678
679 fail:
680 Py_DECREF(list);
681 return NULL;
682}
683
684
685static char join__doc__[] =
686"S.join(sequence) -> string\n\
687\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000688Return a string which is the concatenation of the strings in the\n\
689sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690
691static PyObject *
692string_join(self, args)
693 PyStringObject *self;
694 PyObject *args;
695{
696 char *sep = PyString_AS_STRING(self);
697 int seplen = PyString_GET_SIZE(self);
698 PyObject *res = NULL;
699 int reslen = 0;
700 char *p;
701 int seqlen = 0;
702 int sz = 100;
703 int i, slen;
704 PyObject *seq;
705
Guido van Rossum43713e52000-02-29 13:59:29 +0000706 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000707 return NULL;
708
709 seqlen = PySequence_Length(seq);
710 if (seqlen < 0 && PyErr_Occurred())
711 return NULL;
712
713 if (seqlen == 1) {
714 /* Optimization if there's only one item */
715 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000716 if (item == NULL)
717 return NULL;
718 if (!PyString_Check(item) &&
719 !PyUnicode_Check(item)) {
720 PyErr_SetString(PyExc_TypeError,
721 "first argument must be sequence of strings");
722 Py_DECREF(item);
723 return NULL;
724 }
725 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000726 }
727 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
728 return NULL;
729 p = PyString_AsString(res);
730
731 /* optimize for lists. all others (tuples and arbitrary sequences)
732 * just use the abstract interface.
733 */
734 if (PyList_Check(seq)) {
735 for (i = 0; i < seqlen; i++) {
736 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737 if (!PyString_Check(item)){
738 if (PyUnicode_Check(item)) {
739 Py_DECREF(res);
740 return PyUnicode_Join(
741 (PyObject *)self,
742 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000743 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000744 PyErr_Format(PyExc_TypeError,
745 "sequence item %i not a string",
746 i);
747 goto finally;
748 }
749 slen = PyString_GET_SIZE(item);
750 while (reslen + slen + seplen >= sz) {
751 if (_PyString_Resize(&res, sz*2))
752 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000753 sz *= 2;
754 p = PyString_AsString(res) + reslen;
755 }
756 if (i > 0) {
757 memcpy(p, sep, seplen);
758 p += seplen;
759 reslen += seplen;
760 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000761 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 p += slen;
763 reslen += slen;
764 }
765 }
766 else {
767 for (i = 0; i < seqlen; i++) {
768 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000769 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000770 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000771 if (!PyString_Check(item)){
772 if (PyUnicode_Check(item)) {
773 Py_DECREF(res);
774 Py_DECREF(item);
775 return PyUnicode_Join(
776 (PyObject *)self,
777 seq);
778 }
779 Py_DECREF(item);
780 PyErr_Format(PyExc_TypeError,
781 "sequence item %i not a string",
782 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000783 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000784 }
785 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000786 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000787 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000788 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000790 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000791 sz *= 2;
792 p = PyString_AsString(res) + reslen;
793 }
794 if (i > 0) {
795 memcpy(p, sep, seplen);
796 p += seplen;
797 reslen += seplen;
798 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000799 memcpy(p, PyString_AS_STRING(item), slen);
800 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000801 p += slen;
802 reslen += slen;
803 }
804 }
805 if (_PyString_Resize(&res, reslen))
806 goto finally;
807 return res;
808
809 finally:
810 Py_DECREF(res);
811 return NULL;
812}
813
814
815
816static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000818 PyStringObject *self;
819 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000820 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000822 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000823 int len = PyString_GET_SIZE(self);
824 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000825 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000826
Guido van Rossum4c08d552000-03-10 22:55:18 +0000827 if (!PyArg_ParseTuple(args, "O|ii:find/rfind/index/rindex",
828 &subobj, &i, &last))
829 return -2;
830 if (PyString_Check(subobj)) {
831 sub = PyString_AS_STRING(subobj);
832 n = PyString_GET_SIZE(subobj);
833 }
834 else if (PyUnicode_Check(subobj))
835 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
836 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000837 return -2;
838
839 if (last > len)
840 last = len;
841 if (last < 0)
842 last += len;
843 if (last < 0)
844 last = 0;
845 if (i < 0)
846 i += len;
847 if (i < 0)
848 i = 0;
849
Guido van Rossum4c08d552000-03-10 22:55:18 +0000850 if (dir > 0) {
851 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000852 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000853 last -= n;
854 for (; i <= last; ++i)
855 if (s[i] == sub[0] &&
856 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
857 return (long)i;
858 }
859 else {
860 int j;
861
862 if (n == 0 && i <= last)
863 return (long)last;
864 for (j = last-n; j >= i; --j)
865 if (s[j] == sub[0] &&
866 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
867 return (long)j;
868 }
869
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000870 return -1;
871}
872
873
874static char find__doc__[] =
875"S.find(sub [,start [,end]]) -> int\n\
876\n\
877Return the lowest index in S where substring sub is found,\n\
878such that sub is contained within s[start,end]. Optional\n\
879arguments start and end are interpreted as in slice notation.\n\
880\n\
881Return -1 on failure.";
882
883static PyObject *
884string_find(self, args)
885 PyStringObject *self;
886 PyObject *args;
887{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000888 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889 if (result == -2)
890 return NULL;
891 return PyInt_FromLong(result);
892}
893
894
895static char index__doc__[] =
896"S.index(sub [,start [,end]]) -> int\n\
897\n\
898Like S.find() but raise ValueError when the substring is not found.";
899
900static PyObject *
901string_index(self, args)
902 PyStringObject *self;
903 PyObject *args;
904{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000905 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000906 if (result == -2)
907 return NULL;
908 if (result == -1) {
909 PyErr_SetString(PyExc_ValueError,
910 "substring not found in string.index");
911 return NULL;
912 }
913 return PyInt_FromLong(result);
914}
915
916
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000917static char rfind__doc__[] =
918"S.rfind(sub [,start [,end]]) -> int\n\
919\n\
920Return the highest index in S where substring sub is found,\n\
921such that sub is contained within s[start,end]. Optional\n\
922arguments start and end are interpreted as in slice notation.\n\
923\n\
924Return -1 on failure.";
925
926static PyObject *
927string_rfind(self, args)
928 PyStringObject *self;
929 PyObject *args;
930{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000931 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000932 if (result == -2)
933 return NULL;
934 return PyInt_FromLong(result);
935}
936
937
938static char rindex__doc__[] =
939"S.rindex(sub [,start [,end]]) -> int\n\
940\n\
941Like S.rfind() but raise ValueError when the substring is not found.";
942
943static PyObject *
944string_rindex(self, args)
945 PyStringObject *self;
946 PyObject *args;
947{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000949 if (result == -2)
950 return NULL;
951 if (result == -1) {
952 PyErr_SetString(PyExc_ValueError,
953 "substring not found in string.rindex");
954 return NULL;
955 }
956 return PyInt_FromLong(result);
957}
958
959
960static PyObject *
961do_strip(self, args, striptype)
962 PyStringObject *self;
963 PyObject *args;
964 int striptype;
965{
966 char *s = PyString_AS_STRING(self);
967 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000968
Guido van Rossum43713e52000-02-29 13:59:29 +0000969 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000970 return NULL;
971
972 i = 0;
973 if (striptype != RIGHTSTRIP) {
974 while (i < len && isspace(Py_CHARMASK(s[i]))) {
975 i++;
976 }
977 }
978
979 j = len;
980 if (striptype != LEFTSTRIP) {
981 do {
982 j--;
983 } while (j >= i && isspace(Py_CHARMASK(s[j])));
984 j++;
985 }
986
987 if (i == 0 && j == len) {
988 Py_INCREF(self);
989 return (PyObject*)self;
990 }
991 else
992 return PyString_FromStringAndSize(s+i, j-i);
993}
994
995
996static char strip__doc__[] =
997"S.strip() -> string\n\
998\n\
999Return a copy of the string S with leading and trailing\n\
1000whitespace removed.";
1001
1002static PyObject *
1003string_strip(self, args)
1004 PyStringObject *self;
1005 PyObject *args;
1006{
1007 return do_strip(self, args, BOTHSTRIP);
1008}
1009
1010
1011static char lstrip__doc__[] =
1012"S.lstrip() -> string\n\
1013\n\
1014Return a copy of the string S with leading whitespace removed.";
1015
1016static PyObject *
1017string_lstrip(self, args)
1018 PyStringObject *self;
1019 PyObject *args;
1020{
1021 return do_strip(self, args, LEFTSTRIP);
1022}
1023
1024
1025static char rstrip__doc__[] =
1026"S.rstrip() -> string\n\
1027\n\
1028Return a copy of the string S with trailing whitespace removed.";
1029
1030static PyObject *
1031string_rstrip(self, args)
1032 PyStringObject *self;
1033 PyObject *args;
1034{
1035 return do_strip(self, args, RIGHTSTRIP);
1036}
1037
1038
1039static char lower__doc__[] =
1040"S.lower() -> string\n\
1041\n\
1042Return a copy of the string S converted to lowercase.";
1043
1044static PyObject *
1045string_lower(self, args)
1046 PyStringObject *self;
1047 PyObject *args;
1048{
1049 char *s = PyString_AS_STRING(self), *s_new;
1050 int i, n = PyString_GET_SIZE(self);
1051 PyObject *new;
1052
Guido van Rossum43713e52000-02-29 13:59:29 +00001053 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054 return NULL;
1055 new = PyString_FromStringAndSize(NULL, n);
1056 if (new == NULL)
1057 return NULL;
1058 s_new = PyString_AsString(new);
1059 for (i = 0; i < n; i++) {
1060 int c = Py_CHARMASK(*s++);
1061 if (isupper(c)) {
1062 *s_new = tolower(c);
1063 } else
1064 *s_new = c;
1065 s_new++;
1066 }
1067 return new;
1068}
1069
1070
1071static char upper__doc__[] =
1072"S.upper() -> string\n\
1073\n\
1074Return a copy of the string S converted to uppercase.";
1075
1076static PyObject *
1077string_upper(self, args)
1078 PyStringObject *self;
1079 PyObject *args;
1080{
1081 char *s = PyString_AS_STRING(self), *s_new;
1082 int i, n = PyString_GET_SIZE(self);
1083 PyObject *new;
1084
Guido van Rossum43713e52000-02-29 13:59:29 +00001085 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086 return NULL;
1087 new = PyString_FromStringAndSize(NULL, n);
1088 if (new == NULL)
1089 return NULL;
1090 s_new = PyString_AsString(new);
1091 for (i = 0; i < n; i++) {
1092 int c = Py_CHARMASK(*s++);
1093 if (islower(c)) {
1094 *s_new = toupper(c);
1095 } else
1096 *s_new = c;
1097 s_new++;
1098 }
1099 return new;
1100}
1101
1102
Guido van Rossum4c08d552000-03-10 22:55:18 +00001103static char title__doc__[] =
1104"S.title() -> string\n\
1105\n\
1106Return a titlecased version of S, i.e. words start with uppercase\n\
1107characters, all remaining cased characters have lowercase.";
1108
1109static PyObject*
1110string_title(PyUnicodeObject *self, PyObject *args)
1111{
1112 char *s = PyString_AS_STRING(self), *s_new;
1113 int i, n = PyString_GET_SIZE(self);
1114 int previous_is_cased = 0;
1115 PyObject *new;
1116
1117 if (!PyArg_ParseTuple(args, ":title"))
1118 return NULL;
1119 new = PyString_FromStringAndSize(NULL, n);
1120 if (new == NULL)
1121 return NULL;
1122 s_new = PyString_AsString(new);
1123 for (i = 0; i < n; i++) {
1124 int c = Py_CHARMASK(*s++);
1125 if (islower(c)) {
1126 if (!previous_is_cased)
1127 c = toupper(c);
1128 previous_is_cased = 1;
1129 } else if (isupper(c)) {
1130 if (previous_is_cased)
1131 c = tolower(c);
1132 previous_is_cased = 1;
1133 } else
1134 previous_is_cased = 0;
1135 *s_new++ = c;
1136 }
1137 return new;
1138}
1139
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140static char capitalize__doc__[] =
1141"S.capitalize() -> string\n\
1142\n\
1143Return a copy of the string S with only its first character\n\
1144capitalized.";
1145
1146static PyObject *
1147string_capitalize(self, args)
1148 PyStringObject *self;
1149 PyObject *args;
1150{
1151 char *s = PyString_AS_STRING(self), *s_new;
1152 int i, n = PyString_GET_SIZE(self);
1153 PyObject *new;
1154
Guido van Rossum43713e52000-02-29 13:59:29 +00001155 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001156 return NULL;
1157 new = PyString_FromStringAndSize(NULL, n);
1158 if (new == NULL)
1159 return NULL;
1160 s_new = PyString_AsString(new);
1161 if (0 < n) {
1162 int c = Py_CHARMASK(*s++);
1163 if (islower(c))
1164 *s_new = toupper(c);
1165 else
1166 *s_new = c;
1167 s_new++;
1168 }
1169 for (i = 1; i < n; i++) {
1170 int c = Py_CHARMASK(*s++);
1171 if (isupper(c))
1172 *s_new = tolower(c);
1173 else
1174 *s_new = c;
1175 s_new++;
1176 }
1177 return new;
1178}
1179
1180
1181static char count__doc__[] =
1182"S.count(sub[, start[, end]]) -> int\n\
1183\n\
1184Return the number of occurrences of substring sub in string\n\
1185S[start:end]. Optional arguments start and end are\n\
1186interpreted as in slice notation.";
1187
1188static PyObject *
1189string_count(self, args)
1190 PyStringObject *self;
1191 PyObject *args;
1192{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001193 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001194 int len = PyString_GET_SIZE(self), n;
1195 int i = 0, last = INT_MAX;
1196 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001197 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001198
Guido van Rossum4c08d552000-03-10 22:55:18 +00001199 if (!PyArg_ParseTuple(args, "O|ii:count", &subobj, &i, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001200 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001201 if (PyString_Check(subobj)) {
1202 sub = PyString_AS_STRING(subobj);
1203 n = PyString_GET_SIZE(subobj);
1204 }
1205 else if (PyUnicode_Check(subobj))
1206 return PyInt_FromLong(
1207 PyUnicode_Count((PyObject *)self, subobj, i, last));
1208 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1209 return NULL;
1210
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001211 if (last > len)
1212 last = len;
1213 if (last < 0)
1214 last += len;
1215 if (last < 0)
1216 last = 0;
1217 if (i < 0)
1218 i += len;
1219 if (i < 0)
1220 i = 0;
1221 m = last + 1 - n;
1222 if (n == 0)
1223 return PyInt_FromLong((long) (m-i));
1224
1225 r = 0;
1226 while (i < m) {
1227 if (!memcmp(s+i, sub, n)) {
1228 r++;
1229 i += n;
1230 } else {
1231 i++;
1232 }
1233 }
1234 return PyInt_FromLong((long) r);
1235}
1236
1237
1238static char swapcase__doc__[] =
1239"S.swapcase() -> string\n\
1240\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001241Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242converted to lowercase and vice versa.";
1243
1244static PyObject *
1245string_swapcase(self, args)
1246 PyStringObject *self;
1247 PyObject *args;
1248{
1249 char *s = PyString_AS_STRING(self), *s_new;
1250 int i, n = PyString_GET_SIZE(self);
1251 PyObject *new;
1252
Guido van Rossum43713e52000-02-29 13:59:29 +00001253 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001254 return NULL;
1255 new = PyString_FromStringAndSize(NULL, n);
1256 if (new == NULL)
1257 return NULL;
1258 s_new = PyString_AsString(new);
1259 for (i = 0; i < n; i++) {
1260 int c = Py_CHARMASK(*s++);
1261 if (islower(c)) {
1262 *s_new = toupper(c);
1263 }
1264 else if (isupper(c)) {
1265 *s_new = tolower(c);
1266 }
1267 else
1268 *s_new = c;
1269 s_new++;
1270 }
1271 return new;
1272}
1273
1274
1275static char translate__doc__[] =
1276"S.translate(table [,deletechars]) -> string\n\
1277\n\
1278Return a copy of the string S, where all characters occurring\n\
1279in the optional argument deletechars are removed, and the\n\
1280remaining characters have been mapped through the given\n\
1281translation table, which must be a string of length 256.";
1282
1283static PyObject *
1284string_translate(self, args)
1285 PyStringObject *self;
1286 PyObject *args;
1287{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001288 register char *input, *output;
1289 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 register int i, c, changed = 0;
1291 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293 int inlen, tablen, dellen = 0;
1294 PyObject *result;
1295 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001297
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 if (!PyArg_ParseTuple(args, "O|O:translate",
1299 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301
1302 if (PyString_Check(tableobj)) {
1303 table1 = PyString_AS_STRING(tableobj);
1304 tablen = PyString_GET_SIZE(tableobj);
1305 }
1306 else if (PyUnicode_Check(tableobj)) {
1307 /* Unicode .translate() does not support the deletechars
1308 parameter; instead a mapping to None will cause characters
1309 to be deleted. */
1310 if (delobj != NULL) {
1311 PyErr_SetString(PyExc_TypeError,
1312 "deletions are implemented differently for unicode");
1313 return NULL;
1314 }
1315 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1316 }
1317 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319
1320 if (delobj != NULL) {
1321 if (PyString_Check(delobj)) {
1322 del_table = PyString_AS_STRING(delobj);
1323 dellen = PyString_GET_SIZE(delobj);
1324 }
1325 else if (PyUnicode_Check(delobj)) {
1326 PyErr_SetString(PyExc_TypeError,
1327 "deletions are implemented differently for unicode");
1328 return NULL;
1329 }
1330 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1331 return NULL;
1332
1333 if (tablen != 256) {
1334 PyErr_SetString(PyExc_ValueError,
1335 "translation table must be 256 characters long");
1336 return NULL;
1337 }
1338 }
1339 else {
1340 del_table = NULL;
1341 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342 }
1343
1344 table = table1;
1345 inlen = PyString_Size(input_obj);
1346 result = PyString_FromStringAndSize((char *)NULL, inlen);
1347 if (result == NULL)
1348 return NULL;
1349 output_start = output = PyString_AsString(result);
1350 input = PyString_AsString(input_obj);
1351
1352 if (dellen == 0) {
1353 /* If no deletions are required, use faster code */
1354 for (i = inlen; --i >= 0; ) {
1355 c = Py_CHARMASK(*input++);
1356 if (Py_CHARMASK((*output++ = table[c])) != c)
1357 changed = 1;
1358 }
1359 if (changed)
1360 return result;
1361 Py_DECREF(result);
1362 Py_INCREF(input_obj);
1363 return input_obj;
1364 }
1365
1366 for (i = 0; i < 256; i++)
1367 trans_table[i] = Py_CHARMASK(table[i]);
1368
1369 for (i = 0; i < dellen; i++)
1370 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1371
1372 for (i = inlen; --i >= 0; ) {
1373 c = Py_CHARMASK(*input++);
1374 if (trans_table[c] != -1)
1375 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1376 continue;
1377 changed = 1;
1378 }
1379 if (!changed) {
1380 Py_DECREF(result);
1381 Py_INCREF(input_obj);
1382 return input_obj;
1383 }
1384 /* Fix the size of the resulting string */
1385 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1386 return NULL;
1387 return result;
1388}
1389
1390
1391/* What follows is used for implementing replace(). Perry Stoll. */
1392
1393/*
1394 mymemfind
1395
1396 strstr replacement for arbitrary blocks of memory.
1397
Barry Warsaw51ac5802000-03-20 16:36:48 +00001398 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 contents of memory pointed to by PAT. Returns the index into MEM if
1400 found, or -1 if not found. If len of PAT is greater than length of
1401 MEM, the function returns -1.
1402*/
1403static int
1404mymemfind(mem, len, pat, pat_len)
1405 char *mem;
1406 int len;
1407 char *pat;
1408 int pat_len;
1409{
1410 register int ii;
1411
1412 /* pattern can not occur in the last pat_len-1 chars */
1413 len -= pat_len;
1414
1415 for (ii = 0; ii <= len; ii++) {
1416 if (mem[ii] == pat[0] &&
1417 (pat_len == 1 ||
1418 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1419 return ii;
1420 }
1421 }
1422 return -1;
1423}
1424
1425/*
1426 mymemcnt
1427
1428 Return the number of distinct times PAT is found in MEM.
1429 meaning mem=1111 and pat==11 returns 2.
1430 mem=11111 and pat==11 also return 2.
1431 */
1432static int
1433mymemcnt(mem, len, pat, pat_len)
1434 char *mem;
1435 int len;
1436 char *pat;
1437 int pat_len;
1438{
1439 register int offset = 0;
1440 int nfound = 0;
1441
1442 while (len >= 0) {
1443 offset = mymemfind(mem, len, pat, pat_len);
1444 if (offset == -1)
1445 break;
1446 mem += offset + pat_len;
1447 len -= offset + pat_len;
1448 nfound++;
1449 }
1450 return nfound;
1451}
1452
1453/*
1454 mymemreplace
1455
1456 Return a string in which all occurences of PAT in memory STR are
1457 replaced with SUB.
1458
1459 If length of PAT is less than length of STR or there are no occurences
1460 of PAT in STR, then the original string is returned. Otherwise, a new
1461 string is allocated here and returned.
1462
1463 on return, out_len is:
1464 the length of output string, or
1465 -1 if the input string is returned, or
1466 unchanged if an error occurs (no memory).
1467
1468 return value is:
1469 the new string allocated locally, or
1470 NULL if an error occurred.
1471*/
1472static char *
1473mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1474 char *str;
1475 int len; /* input string */
1476 char *pat;
1477 int pat_len; /* pattern string to find */
1478 char *sub;
1479 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001480 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 int *out_len;
1482
1483{
1484 char *out_s;
1485 char *new_s;
1486 int nfound, offset, new_len;
1487
1488 if (len == 0 || pat_len > len)
1489 goto return_same;
1490
1491 /* find length of output string */
1492 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001493 if (count < 0)
1494 count = INT_MAX;
1495 else if (nfound > count)
1496 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 if (nfound == 0)
1498 goto return_same;
1499 new_len = len + nfound*(sub_len - pat_len);
1500
1501 new_s = (char *)malloc(new_len);
1502 if (new_s == NULL) return NULL;
1503
1504 *out_len = new_len;
1505 out_s = new_s;
1506
1507 while (len > 0) {
1508 /* find index of next instance of pattern */
1509 offset = mymemfind(str, len, pat, pat_len);
1510 /* if not found, break out of loop */
1511 if (offset == -1) break;
1512
1513 /* copy non matching part of input string */
1514 memcpy(new_s, str, offset); /* copy part of str before pat */
1515 str += offset + pat_len; /* move str past pattern */
1516 len -= offset + pat_len; /* reduce length of str remaining */
1517
1518 /* copy substitute into the output string */
1519 new_s += offset; /* move new_s to dest for sub string */
1520 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1521 new_s += sub_len; /* offset new_s past sub string */
1522
1523 /* break when we've done count replacements */
1524 if (--count == 0) break;
1525 }
1526 /* copy any remaining values into output string */
1527 if (len > 0)
1528 memcpy(new_s, str, len);
1529 return out_s;
1530
1531 return_same:
1532 *out_len = -1;
1533 return str;
1534}
1535
1536
1537static char replace__doc__[] =
1538"S.replace (old, new[, maxsplit]) -> string\n\
1539\n\
1540Return a copy of string S with all occurrences of substring\n\
1541old replaced by new. If the optional argument maxsplit is\n\
1542given, only the first maxsplit occurrences are replaced.";
1543
1544static PyObject *
1545string_replace(self, args)
1546 PyStringObject *self;
1547 PyObject *args;
1548{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 const char *str = PyString_AS_STRING(self), *sub, *repl;
1550 char *new_s;
1551 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1552 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555
Guido van Rossum4c08d552000-03-10 22:55:18 +00001556 if (!PyArg_ParseTuple(args, "OO|i:replace",
1557 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001559
1560 if (PyString_Check(subobj)) {
1561 sub = PyString_AS_STRING(subobj);
1562 sub_len = PyString_GET_SIZE(subobj);
1563 }
1564 else if (PyUnicode_Check(subobj))
1565 return PyUnicode_Replace((PyObject *)self,
1566 subobj, replobj, count);
1567 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1568 return NULL;
1569
1570 if (PyString_Check(replobj)) {
1571 repl = PyString_AS_STRING(replobj);
1572 repl_len = PyString_GET_SIZE(replobj);
1573 }
1574 else if (PyUnicode_Check(replobj))
1575 return PyUnicode_Replace((PyObject *)self,
1576 subobj, replobj, count);
1577 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1578 return NULL;
1579
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001580 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001581 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 return NULL;
1583 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 if (new_s == NULL) {
1586 PyErr_NoMemory();
1587 return NULL;
1588 }
1589 if (out_len == -1) {
1590 /* we're returning another reference to self */
1591 new = (PyObject*)self;
1592 Py_INCREF(new);
1593 }
1594 else {
1595 new = PyString_FromStringAndSize(new_s, out_len);
1596 free(new_s);
1597 }
1598 return new;
1599}
1600
1601
1602static char startswith__doc__[] =
1603"S.startswith(prefix[, start[, end]]) -> int\n\
1604\n\
1605Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1606optional start, test S beginning at that position. With optional end, stop\n\
1607comparing S at that position.";
1608
1609static PyObject *
1610string_startswith(self, args)
1611 PyStringObject *self;
1612 PyObject *args;
1613{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001614 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 int plen;
1618 int start = 0;
1619 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001620 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622 if (!PyArg_ParseTuple(args, "O|ii:startswith", &subobj, &start, &end))
1623 return NULL;
1624 if (PyString_Check(subobj)) {
1625 prefix = PyString_AS_STRING(subobj);
1626 plen = PyString_GET_SIZE(subobj);
1627 }
1628 else if (PyUnicode_Check(subobj))
1629 return PyInt_FromLong(
1630 PyUnicode_Tailmatch((PyObject *)self,
1631 subobj, start, end, -1));
1632 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633 return NULL;
1634
1635 /* adopt Java semantics for index out of range. it is legal for
1636 * offset to be == plen, but this only returns true if prefix is
1637 * the empty string.
1638 */
1639 if (start < 0 || start+plen > len)
1640 return PyInt_FromLong(0);
1641
1642 if (!memcmp(str+start, prefix, plen)) {
1643 /* did the match end after the specified end? */
1644 if (end < 0)
1645 return PyInt_FromLong(1);
1646 else if (end - start < plen)
1647 return PyInt_FromLong(0);
1648 else
1649 return PyInt_FromLong(1);
1650 }
1651 else return PyInt_FromLong(0);
1652}
1653
1654
1655static char endswith__doc__[] =
1656"S.endswith(suffix[, start[, end]]) -> int\n\
1657\n\
1658Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1659optional start, test S beginning at that position. With optional end, stop\n\
1660comparing S at that position.";
1661
1662static PyObject *
1663string_endswith(self, args)
1664 PyStringObject *self;
1665 PyObject *args;
1666{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001667 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001669 const char* suffix;
1670 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671 int start = 0;
1672 int end = -1;
1673 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675
Guido van Rossum4c08d552000-03-10 22:55:18 +00001676 if (!PyArg_ParseTuple(args, "O|ii:endswith", &subobj, &start, &end))
1677 return NULL;
1678 if (PyString_Check(subobj)) {
1679 suffix = PyString_AS_STRING(subobj);
1680 slen = PyString_GET_SIZE(subobj);
1681 }
1682 else if (PyUnicode_Check(subobj))
1683 return PyInt_FromLong(
1684 PyUnicode_Tailmatch((PyObject *)self,
1685 subobj, start, end, +1));
1686 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687 return NULL;
1688
Guido van Rossum4c08d552000-03-10 22:55:18 +00001689 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690 return PyInt_FromLong(0);
1691
1692 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694
Guido van Rossum4c08d552000-03-10 22:55:18 +00001695 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696 return PyInt_FromLong(1);
1697 else return PyInt_FromLong(0);
1698}
1699
1700
Guido van Rossum4c08d552000-03-10 22:55:18 +00001701static char expandtabs__doc__[] =
1702"S.expandtabs([tabsize]) -> string\n\
1703\n\
1704Return a copy of S where all tab characters are expanded using spaces.\n\
1705If tabsize is not given, a tab size of 8 characters is assumed.";
1706
1707static PyObject*
1708string_expandtabs(PyStringObject *self, PyObject *args)
1709{
1710 const char *e, *p;
1711 char *q;
1712 int i, j;
1713 PyObject *u;
1714 int tabsize = 8;
1715
1716 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1717 return NULL;
1718
1719 /* First pass: determine size of ouput string */
1720 i = j = 0;
1721 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1722 for (p = PyString_AS_STRING(self); p < e; p++)
1723 if (*p == '\t') {
1724 if (tabsize > 0)
1725 j += tabsize - (j % tabsize);
1726 }
1727 else {
1728 j++;
1729 if (*p == '\n' || *p == '\r') {
1730 i += j;
1731 j = 0;
1732 }
1733 }
1734
1735 /* Second pass: create output string and fill it */
1736 u = PyString_FromStringAndSize(NULL, i + j);
1737 if (!u)
1738 return NULL;
1739
1740 j = 0;
1741 q = PyString_AS_STRING(u);
1742
1743 for (p = PyString_AS_STRING(self); p < e; p++)
1744 if (*p == '\t') {
1745 if (tabsize > 0) {
1746 i = tabsize - (j % tabsize);
1747 j += i;
1748 while (i--)
1749 *q++ = ' ';
1750 }
1751 }
1752 else {
1753 j++;
1754 *q++ = *p;
1755 if (*p == '\n' || *p == '\r')
1756 j = 0;
1757 }
1758
1759 return u;
1760}
1761
1762static
1763PyObject *pad(PyStringObject *self,
1764 int left,
1765 int right,
1766 char fill)
1767{
1768 PyObject *u;
1769
1770 if (left < 0)
1771 left = 0;
1772 if (right < 0)
1773 right = 0;
1774
1775 if (left == 0 && right == 0) {
1776 Py_INCREF(self);
1777 return (PyObject *)self;
1778 }
1779
1780 u = PyString_FromStringAndSize(NULL,
1781 left + PyString_GET_SIZE(self) + right);
1782 if (u) {
1783 if (left)
1784 memset(PyString_AS_STRING(u), fill, left);
1785 memcpy(PyString_AS_STRING(u) + left,
1786 PyString_AS_STRING(self),
1787 PyString_GET_SIZE(self));
1788 if (right)
1789 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1790 fill, right);
1791 }
1792
1793 return u;
1794}
1795
1796static char ljust__doc__[] =
1797"S.ljust(width) -> string\n\
1798\n\
1799Return S left justified in a string of length width. Padding is\n\
1800done using spaces.";
1801
1802static PyObject *
1803string_ljust(PyStringObject *self, PyObject *args)
1804{
1805 int width;
1806 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1807 return NULL;
1808
1809 if (PyString_GET_SIZE(self) >= width) {
1810 Py_INCREF(self);
1811 return (PyObject*) self;
1812 }
1813
1814 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1815}
1816
1817
1818static char rjust__doc__[] =
1819"S.rjust(width) -> string\n\
1820\n\
1821Return S right justified in a string of length width. Padding is\n\
1822done using spaces.";
1823
1824static PyObject *
1825string_rjust(PyStringObject *self, PyObject *args)
1826{
1827 int width;
1828 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1829 return NULL;
1830
1831 if (PyString_GET_SIZE(self) >= width) {
1832 Py_INCREF(self);
1833 return (PyObject*) self;
1834 }
1835
1836 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1837}
1838
1839
1840static char center__doc__[] =
1841"S.center(width) -> string\n\
1842\n\
1843Return S centered in a string of length width. Padding is done\n\
1844using spaces.";
1845
1846static PyObject *
1847string_center(PyStringObject *self, PyObject *args)
1848{
1849 int marg, left;
1850 int width;
1851
1852 if (!PyArg_ParseTuple(args, "i:center", &width))
1853 return NULL;
1854
1855 if (PyString_GET_SIZE(self) >= width) {
1856 Py_INCREF(self);
1857 return (PyObject*) self;
1858 }
1859
1860 marg = width - PyString_GET_SIZE(self);
1861 left = marg / 2 + (marg & width & 1);
1862
1863 return pad(self, left, marg - left, ' ');
1864}
1865
1866#if 0
1867static char zfill__doc__[] =
1868"S.zfill(width) -> string\n\
1869\n\
1870Pad a numeric string x with zeros on the left, to fill a field\n\
1871of the specified width. The string x is never truncated.";
1872
1873static PyObject *
1874string_zfill(PyStringObject *self, PyObject *args)
1875{
1876 int fill;
1877 PyObject *u;
1878 char *str;
1879
1880 int width;
1881 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1882 return NULL;
1883
1884 if (PyString_GET_SIZE(self) >= width) {
1885 Py_INCREF(self);
1886 return (PyObject*) self;
1887 }
1888
1889 fill = width - PyString_GET_SIZE(self);
1890
1891 u = pad(self, fill, 0, '0');
1892 if (u == NULL)
1893 return NULL;
1894
1895 str = PyString_AS_STRING(u);
1896 if (str[fill] == '+' || str[fill] == '-') {
1897 /* move sign to beginning of string */
1898 str[0] = str[fill];
1899 str[fill] = '0';
1900 }
1901
1902 return u;
1903}
1904#endif
1905
1906static char isspace__doc__[] =
1907"S.isspace() -> int\n\
1908\n\
1909Return 1 if there are only whitespace characters in S,\n\
19100 otherwise.";
1911
1912static PyObject*
1913string_isspace(PyStringObject *self, PyObject *args)
1914{
1915 register const char *p = PyString_AS_STRING(self);
1916 register const char *e;
1917
1918 if (!PyArg_NoArgs(args))
1919 return NULL;
1920
1921 /* Shortcut for single character strings */
1922 if (PyString_GET_SIZE(self) == 1 &&
1923 isspace(*p))
1924 return PyInt_FromLong(1);
1925
1926 e = p + PyString_GET_SIZE(self);
1927 for (; p < e; p++) {
1928 if (!isspace(*p))
1929 return PyInt_FromLong(0);
1930 }
1931 return PyInt_FromLong(1);
1932}
1933
1934
1935static char isdigit__doc__[] =
1936"S.isdigit() -> int\n\
1937\n\
1938Return 1 if there are only digit characters in S,\n\
19390 otherwise.";
1940
1941static PyObject*
1942string_isdigit(PyStringObject *self, PyObject *args)
1943{
1944 register const char *p = PyString_AS_STRING(self);
1945 register const char *e;
1946
1947 if (!PyArg_NoArgs(args))
1948 return NULL;
1949
1950 /* Shortcut for single character strings */
1951 if (PyString_GET_SIZE(self) == 1 &&
1952 isdigit(*p))
1953 return PyInt_FromLong(1);
1954
1955 e = p + PyString_GET_SIZE(self);
1956 for (; p < e; p++) {
1957 if (!isdigit(*p))
1958 return PyInt_FromLong(0);
1959 }
1960 return PyInt_FromLong(1);
1961}
1962
1963
1964static char islower__doc__[] =
1965"S.islower() -> int\n\
1966\n\
1967Return 1 if all cased characters in S are lowercase and there is\n\
1968at least one cased character in S, 0 otherwise.";
1969
1970static PyObject*
1971string_islower(PyStringObject *self, PyObject *args)
1972{
1973 register const char *p = PyString_AS_STRING(self);
1974 register const char *e;
1975 int cased;
1976
1977 if (!PyArg_NoArgs(args))
1978 return NULL;
1979
1980 /* Shortcut for single character strings */
1981 if (PyString_GET_SIZE(self) == 1)
1982 return PyInt_FromLong(islower(*p) != 0);
1983
1984 e = p + PyString_GET_SIZE(self);
1985 cased = 0;
1986 for (; p < e; p++) {
1987 if (isupper(*p))
1988 return PyInt_FromLong(0);
1989 else if (!cased && islower(*p))
1990 cased = 1;
1991 }
1992 return PyInt_FromLong(cased);
1993}
1994
1995
1996static char isupper__doc__[] =
1997"S.isupper() -> int\n\
1998\n\
1999Return 1 if all cased characters in S are uppercase and there is\n\
2000at least one cased character in S, 0 otherwise.";
2001
2002static PyObject*
2003string_isupper(PyStringObject *self, PyObject *args)
2004{
2005 register const char *p = PyString_AS_STRING(self);
2006 register const char *e;
2007 int cased;
2008
2009 if (!PyArg_NoArgs(args))
2010 return NULL;
2011
2012 /* Shortcut for single character strings */
2013 if (PyString_GET_SIZE(self) == 1)
2014 return PyInt_FromLong(isupper(*p) != 0);
2015
2016 e = p + PyString_GET_SIZE(self);
2017 cased = 0;
2018 for (; p < e; p++) {
2019 if (islower(*p))
2020 return PyInt_FromLong(0);
2021 else if (!cased && isupper(*p))
2022 cased = 1;
2023 }
2024 return PyInt_FromLong(cased);
2025}
2026
2027
2028static char istitle__doc__[] =
2029"S.istitle() -> int\n\
2030\n\
2031Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2032may only follow uncased characters and lowercase characters only cased\n\
2033ones. Return 0 otherwise.";
2034
2035static PyObject*
2036string_istitle(PyStringObject *self, PyObject *args)
2037{
2038 register const char *p = PyString_AS_STRING(self);
2039 register const char *e;
2040 int cased, previous_is_cased;
2041
2042 if (!PyArg_NoArgs(args))
2043 return NULL;
2044
2045 /* Shortcut for single character strings */
2046 if (PyString_GET_SIZE(self) == 1)
2047 return PyInt_FromLong(isupper(*p) != 0);
2048
2049 e = p + PyString_GET_SIZE(self);
2050 cased = 0;
2051 previous_is_cased = 0;
2052 for (; p < e; p++) {
2053 register const char ch = *p;
2054
2055 if (isupper(ch)) {
2056 if (previous_is_cased)
2057 return PyInt_FromLong(0);
2058 previous_is_cased = 1;
2059 cased = 1;
2060 }
2061 else if (islower(ch)) {
2062 if (!previous_is_cased)
2063 return PyInt_FromLong(0);
2064 previous_is_cased = 1;
2065 cased = 1;
2066 }
2067 else
2068 previous_is_cased = 0;
2069 }
2070 return PyInt_FromLong(cased);
2071}
2072
2073
2074static char splitlines__doc__[] =
2075"S.splitlines([maxsplit]]) -> list of strings\n\
2076\n\
2077Return a list of the lines in S, breaking at line boundaries.\n\
2078If maxsplit is given, at most maxsplit are done. Line breaks are not\n\
2079included in the resulting list.";
2080
2081#define SPLIT_APPEND(data, left, right) \
2082 str = PyString_FromStringAndSize(data + left, right - left); \
2083 if (!str) \
2084 goto onError; \
2085 if (PyList_Append(list, str)) { \
2086 Py_DECREF(str); \
2087 goto onError; \
2088 } \
2089 else \
2090 Py_DECREF(str);
2091
2092static PyObject*
2093string_splitlines(PyStringObject *self, PyObject *args)
2094{
2095 int maxcount = -1;
2096 register int i;
2097 register int j;
2098 int len;
2099 PyObject *list;
2100 PyObject *str;
2101 char *data;
2102
2103 if (!PyArg_ParseTuple(args, "|i:splitlines", &maxcount))
2104 return NULL;
2105
2106 data = PyString_AS_STRING(self);
2107 len = PyString_GET_SIZE(self);
2108
2109 if (maxcount < 0)
2110 maxcount = INT_MAX;
2111
2112 list = PyList_New(0);
2113 if (!list)
2114 goto onError;
2115
2116 for (i = j = 0; i < len; ) {
2117 /* Find a line and append it */
2118 while (i < len && data[i] != '\n' && data[i] != '\r')
2119 i++;
2120 if (maxcount-- <= 0)
2121 break;
2122 SPLIT_APPEND(data, j, i);
2123
2124 /* Skip the line break reading CRLF as one line break */
2125 if (i < len) {
2126 if (data[i] == '\r' && i + 1 < len &&
2127 data[i+1] == '\n')
2128 i += 2;
2129 else
2130 i++;
2131 }
2132 j = i;
2133 }
2134 if (j < len) {
2135 SPLIT_APPEND(data, j, len);
2136 }
2137
2138 return list;
2139
2140 onError:
2141 Py_DECREF(list);
2142 return NULL;
2143}
2144
2145#undef SPLIT_APPEND
2146
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147
2148static PyMethodDef
2149string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150 /* Counterparts of the obsolete stropmodule functions; except
2151 string.maketrans(). */
2152 {"join", (PyCFunction)string_join, 1, join__doc__},
2153 {"split", (PyCFunction)string_split, 1, split__doc__},
2154 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2155 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2156 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2157 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2158 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2159 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2160 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2162 {"count", (PyCFunction)string_count, 1, count__doc__},
2163 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2164 {"find", (PyCFunction)string_find, 1, find__doc__},
2165 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2168 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2169 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2170 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2172 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2173 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2175 {"title", (PyCFunction)string_title, 1, title__doc__},
2176 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2177 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2178 {"center", (PyCFunction)string_center, 1, center__doc__},
2179 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2180 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2181#if 0
2182 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2183#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 {NULL, NULL} /* sentinel */
2185};
2186
2187static PyObject *
2188string_getattr(s, name)
2189 PyStringObject *s;
2190 char *name;
2191{
2192 return Py_FindMethod(string_methods, (PyObject*)s, name);
2193}
2194
2195
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002196PyTypeObject PyString_Type = {
2197 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002198 0,
2199 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002200 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002201 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002202 (destructor)string_dealloc, /*tp_dealloc*/
2203 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002205 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002206 (cmpfunc)string_compare, /*tp_compare*/
2207 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002208 0, /*tp_as_number*/
2209 &string_as_sequence, /*tp_as_sequence*/
2210 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002211 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002212 0, /*tp_call*/
2213 0, /*tp_str*/
2214 0, /*tp_getattro*/
2215 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002216 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002217 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002218 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002219};
2220
2221void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002222PyString_Concat(pv, w)
2223 register PyObject **pv;
2224 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002225{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002226 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002227 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002228 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002229 if (w == NULL || !PyString_Check(*pv)) {
2230 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002231 *pv = NULL;
2232 return;
2233 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002234 v = string_concat((PyStringObject *) *pv, w);
2235 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002236 *pv = v;
2237}
2238
Guido van Rossum013142a1994-08-30 08:19:36 +00002239void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002240PyString_ConcatAndDel(pv, w)
2241 register PyObject **pv;
2242 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002243{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002244 PyString_Concat(pv, w);
2245 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002246}
2247
2248
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002249/* The following function breaks the notion that strings are immutable:
2250 it changes the size of a string. We get away with this only if there
2251 is only one module referencing the object. You can also think of it
2252 as creating a new string object and destroying the old one, only
2253 more efficiently. In any case, don't use this if the string may
2254 already be known to some other part of the code... */
2255
2256int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002257_PyString_Resize(pv, newsize)
2258 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002259 int newsize;
2260{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002261 register PyObject *v;
2262 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002263 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002264 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002265 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002266 Py_DECREF(v);
2267 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002268 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002269 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002270 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002271#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002272 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002273#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002274 _Py_ForgetReference(v);
2275 *pv = (PyObject *)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002276 realloc((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002277 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002278 if (*pv == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002279 PyMem_DEL(v);
2280 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002281 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002282 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002283 _Py_NewReference(*pv);
2284 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002285 sv->ob_size = newsize;
2286 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002287 return 0;
2288}
Guido van Rossume5372401993-03-16 12:15:04 +00002289
2290/* Helpers for formatstring */
2291
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002292static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002293getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002294 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002295 int arglen;
2296 int *p_argidx;
2297{
2298 int argidx = *p_argidx;
2299 if (argidx < arglen) {
2300 (*p_argidx)++;
2301 if (arglen < 0)
2302 return args;
2303 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002304 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002305 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002306 PyErr_SetString(PyExc_TypeError,
2307 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002308 return NULL;
2309}
2310
2311#define F_LJUST (1<<0)
2312#define F_SIGN (1<<1)
2313#define F_BLANK (1<<2)
2314#define F_ALT (1<<3)
2315#define F_ZERO (1<<4)
2316
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002317static int
2318formatfloat(buf, flags, prec, type, v)
2319 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002320 int flags;
2321 int prec;
2322 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002323 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002324{
2325 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002326 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002327 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002328 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002329 if (prec < 0)
2330 prec = 6;
2331 if (prec > 50)
2332 prec = 50; /* Arbitrary limitation */
2333 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2334 type = 'g';
2335 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2336 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002337 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002338}
2339
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002340static int
2341formatint(buf, flags, prec, type, v)
2342 char *buf;
Guido van Rossume5372401993-03-16 12:15:04 +00002343 int flags;
2344 int prec;
2345 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002346 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002347{
2348 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002349 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002350 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002351 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002352 if (prec < 0)
2353 prec = 1;
2354 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2355 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002356 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002357}
2358
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002359static int
2360formatchar(buf, v)
2361 char *buf;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002362 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002363{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002364 if (PyString_Check(v)) {
2365 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002366 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002367 }
2368 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002369 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002370 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002371 }
2372 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002373 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002374}
2375
Guido van Rossum013142a1994-08-30 08:19:36 +00002376
Guido van Rossume5372401993-03-16 12:15:04 +00002377/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
2378
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002379PyObject *
2380PyString_Format(format, args)
2381 PyObject *format;
2382 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002383{
2384 char *fmt, *res;
2385 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002386 int args_owned = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002387 PyObject *result;
2388 PyObject *dict = NULL;
2389 if (format == NULL || !PyString_Check(format) || args == NULL) {
2390 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002391 return NULL;
2392 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002393 fmt = PyString_AsString(format);
2394 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002395 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002396 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002397 if (result == NULL)
2398 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002399 res = PyString_AsString(result);
2400 if (PyTuple_Check(args)) {
2401 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002402 argidx = 0;
2403 }
2404 else {
2405 arglen = -1;
2406 argidx = -2;
2407 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002408 if (args->ob_type->tp_as_mapping)
2409 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002410 while (--fmtcnt >= 0) {
2411 if (*fmt != '%') {
2412 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002413 rescnt = fmtcnt + 100;
2414 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002415 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002416 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002417 res = PyString_AsString(result)
2418 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002419 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002420 }
2421 *res++ = *fmt++;
2422 }
2423 else {
2424 /* Got a format specifier */
2425 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002426 int width = -1;
2427 int prec = -1;
2428 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002429 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002430 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002431 PyObject *v = NULL;
2432 PyObject *temp = NULL;
Guido van Rossume5372401993-03-16 12:15:04 +00002433 char *buf;
2434 int sign;
2435 int len;
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002436 char tmpbuf[120]; /* For format{float,int,char}() */
Guido van Rossumda9c2711996-12-05 21:58:58 +00002437 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002438 if (*fmt == '(') {
2439 char *keystart;
2440 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002441 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002442 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002443
2444 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002445 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002446 "format requires a mapping");
2447 goto error;
2448 }
2449 ++fmt;
2450 --fmtcnt;
2451 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002452 /* Skip over balanced parentheses */
2453 while (pcount > 0 && --fmtcnt >= 0) {
2454 if (*fmt == ')')
2455 --pcount;
2456 else if (*fmt == '(')
2457 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002458 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002459 }
2460 keylen = fmt - keystart - 1;
2461 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002462 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002463 "incomplete format key");
2464 goto error;
2465 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002466 key = PyString_FromStringAndSize(keystart,
2467 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002468 if (key == NULL)
2469 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002470 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002471 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002472 args_owned = 0;
2473 }
2474 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002475 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002476 if (args == NULL) {
2477 goto error;
2478 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002479 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002480 arglen = -1;
2481 argidx = -2;
2482 }
Guido van Rossume5372401993-03-16 12:15:04 +00002483 while (--fmtcnt >= 0) {
2484 switch (c = *fmt++) {
2485 case '-': flags |= F_LJUST; continue;
2486 case '+': flags |= F_SIGN; continue;
2487 case ' ': flags |= F_BLANK; continue;
2488 case '#': flags |= F_ALT; continue;
2489 case '0': flags |= F_ZERO; continue;
2490 }
2491 break;
2492 }
2493 if (c == '*') {
2494 v = getnextarg(args, arglen, &argidx);
2495 if (v == NULL)
2496 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002497 if (!PyInt_Check(v)) {
2498 PyErr_SetString(PyExc_TypeError,
2499 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002500 goto error;
2501 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002502 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002503 if (width < 0) {
2504 flags |= F_LJUST;
2505 width = -width;
2506 }
Guido van Rossume5372401993-03-16 12:15:04 +00002507 if (--fmtcnt >= 0)
2508 c = *fmt++;
2509 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002510 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002511 width = c - '0';
2512 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002513 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002514 if (!isdigit(c))
2515 break;
2516 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002517 PyErr_SetString(
2518 PyExc_ValueError,
2519 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002520 goto error;
2521 }
2522 width = width*10 + (c - '0');
2523 }
2524 }
2525 if (c == '.') {
2526 prec = 0;
2527 if (--fmtcnt >= 0)
2528 c = *fmt++;
2529 if (c == '*') {
2530 v = getnextarg(args, arglen, &argidx);
2531 if (v == NULL)
2532 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002533 if (!PyInt_Check(v)) {
2534 PyErr_SetString(
2535 PyExc_TypeError,
2536 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002537 goto error;
2538 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002539 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002540 if (prec < 0)
2541 prec = 0;
2542 if (--fmtcnt >= 0)
2543 c = *fmt++;
2544 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002545 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002546 prec = c - '0';
2547 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002548 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002549 if (!isdigit(c))
2550 break;
2551 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002552 PyErr_SetString(
2553 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002554 "prec too big");
2555 goto error;
2556 }
2557 prec = prec*10 + (c - '0');
2558 }
2559 }
2560 } /* prec */
2561 if (fmtcnt >= 0) {
2562 if (c == 'h' || c == 'l' || c == 'L') {
2563 size = c;
2564 if (--fmtcnt >= 0)
2565 c = *fmt++;
2566 }
2567 }
2568 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002569 PyErr_SetString(PyExc_ValueError,
2570 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002571 goto error;
2572 }
2573 if (c != '%') {
2574 v = getnextarg(args, arglen, &argidx);
2575 if (v == NULL)
2576 goto error;
2577 }
2578 sign = 0;
2579 fill = ' ';
2580 switch (c) {
2581 case '%':
2582 buf = "%";
2583 len = 1;
2584 break;
2585 case 's':
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002586 temp = PyObject_Str(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002587 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002588 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002589 if (!PyString_Check(temp)) {
2590 PyErr_SetString(PyExc_TypeError,
2591 "%s argument has non-string str()");
2592 goto error;
2593 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002594 buf = PyString_AsString(temp);
2595 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002596 if (prec >= 0 && len > prec)
2597 len = prec;
2598 break;
2599 case 'i':
2600 case 'd':
2601 case 'u':
2602 case 'o':
2603 case 'x':
2604 case 'X':
2605 if (c == 'i')
2606 c = 'd';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002607 buf = tmpbuf;
2608 len = formatint(buf, flags, prec, c, v);
2609 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002610 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002611 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002612 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002613 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002614 if ((flags&F_ALT) &&
2615 (c == 'x' || c == 'X') &&
2616 buf[0] == '0' && buf[1] == c) {
2617 *res++ = *buf++;
2618 *res++ = *buf++;
2619 rescnt -= 2;
2620 len -= 2;
2621 width -= 2;
2622 if (width < 0)
2623 width = 0;
2624 }
2625 }
Guido van Rossume5372401993-03-16 12:15:04 +00002626 break;
2627 case 'e':
2628 case 'E':
2629 case 'f':
2630 case 'g':
2631 case 'G':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002632 buf = tmpbuf;
2633 len = formatfloat(buf, flags, prec, c, v);
2634 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002635 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002636 sign = 1;
2637 if (flags&F_ZERO)
2638 fill = '0';
2639 break;
2640 case 'c':
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002641 buf = tmpbuf;
2642 len = formatchar(buf, v);
2643 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002644 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002645 break;
2646 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002647 PyErr_Format(PyExc_ValueError,
2648 "unsupported format character '%c' (0x%x)",
2649 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002650 goto error;
2651 }
2652 if (sign) {
2653 if (*buf == '-' || *buf == '+') {
2654 sign = *buf++;
2655 len--;
2656 }
2657 else if (flags & F_SIGN)
2658 sign = '+';
2659 else if (flags & F_BLANK)
2660 sign = ' ';
2661 else
2662 sign = '\0';
2663 }
2664 if (width < len)
2665 width = len;
2666 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002667 reslen -= rescnt;
2668 rescnt = width + fmtcnt + 100;
2669 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002670 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002671 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002672 res = PyString_AsString(result)
2673 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002674 }
2675 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002676 if (fill != ' ')
2677 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002678 rescnt--;
2679 if (width > len)
2680 width--;
2681 }
2682 if (width > len && !(flags&F_LJUST)) {
2683 do {
2684 --rescnt;
2685 *res++ = fill;
2686 } while (--width > len);
2687 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002688 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002689 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002690 memcpy(res, buf, len);
2691 res += len;
2692 rescnt -= len;
2693 while (--width >= len) {
2694 --rescnt;
2695 *res++ = ' ';
2696 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002697 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002698 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002699 "not all arguments converted");
2700 goto error;
2701 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002702 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002703 } /* '%' */
2704 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002705 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002706 PyErr_SetString(PyExc_TypeError,
2707 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002708 goto error;
2709 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002710 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002711 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002712 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002713 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002714 return result;
2715 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002716 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002717 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002718 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002719 }
Guido van Rossume5372401993-03-16 12:15:04 +00002720 return NULL;
2721}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002722
2723
2724#ifdef INTERN_STRINGS
2725
2726static PyObject *interned;
2727
2728void
2729PyString_InternInPlace(p)
2730 PyObject **p;
2731{
2732 register PyStringObject *s = (PyStringObject *)(*p);
2733 PyObject *t;
2734 if (s == NULL || !PyString_Check(s))
2735 Py_FatalError("PyString_InternInPlace: strings only please!");
2736 if ((t = s->ob_sinterned) != NULL) {
2737 if (t == (PyObject *)s)
2738 return;
2739 Py_INCREF(t);
2740 *p = t;
2741 Py_DECREF(s);
2742 return;
2743 }
2744 if (interned == NULL) {
2745 interned = PyDict_New();
2746 if (interned == NULL)
2747 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002748 }
2749 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2750 Py_INCREF(t);
2751 *p = s->ob_sinterned = t;
2752 Py_DECREF(s);
2753 return;
2754 }
2755 t = (PyObject *)s;
2756 if (PyDict_SetItem(interned, t, t) == 0) {
2757 s->ob_sinterned = t;
2758 return;
2759 }
2760 PyErr_Clear();
2761}
2762
2763
2764PyObject *
2765PyString_InternFromString(cp)
2766 const char *cp;
2767{
2768 PyObject *s = PyString_FromString(cp);
2769 if (s == NULL)
2770 return NULL;
2771 PyString_InternInPlace(&s);
2772 return s;
2773}
2774
2775#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002776
2777void
2778PyString_Fini()
2779{
2780 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002781 for (i = 0; i < UCHAR_MAX + 1; i++) {
2782 Py_XDECREF(characters[i]);
2783 characters[i] = NULL;
2784 }
2785#ifndef DONT_SHARE_SHORT_STRINGS
2786 Py_XDECREF(nullstring);
2787 nullstring = NULL;
2788#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002789#ifdef INTERN_STRINGS
2790 if (interned) {
2791 int pos, changed;
2792 PyObject *key, *value;
2793 do {
2794 changed = 0;
2795 pos = 0;
2796 while (PyDict_Next(interned, &pos, &key, &value)) {
2797 if (key->ob_refcnt == 2 && key == value) {
2798 PyDict_DelItem(interned, key);
2799 changed = 1;
2800 }
2801 }
2802 } while (changed);
2803 }
2804#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002805}