blob: e918829b72af320b34d778d7b9a2a5c25a691ad4 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* String object implementation */
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014
Guido van Rossum71160aa1997-06-03 18:03:18 +000015#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000016#include <ctype.h>
17
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018#ifdef COUNT_ALLOCS
19int null_strings, one_strings;
20#endif
21
Guido van Rossum03093a21994-09-28 15:51:32 +000022#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000023#include <limits.h>
24#else
25#ifndef UCHAR_MAX
26#define UCHAR_MAX 255
27#endif
28#endif
29
Guido van Rossumc0b618a1997-05-02 03:12:38 +000030static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000031#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000032static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000033#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000034
35/*
36 Newsizedstringobject() and newstringobject() try in certain cases
37 to share string objects. When the size of the string is zero,
38 these routines always return a pointer to the same string object;
39 when the size is one, they return a pointer to an already existing
40 object if the contents of the string is known. For
41 newstringobject() this is always the case, for
42 newsizedstringobject() this is the case when the first argument in
43 not NULL.
44 A common practice to allocate a string and then fill it in or
45 change it must be done carefully. It is only allowed to change the
46 contents of the string if the obect was gotten from
47 newsizedstringobject() with a NULL first argument, because in the
48 future these routines may try to do even more sharing of objects.
49*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000050PyObject *
51PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000052 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053 int size;
54{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000056#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000073#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
75 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000078 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000079 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000080 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081#ifdef CACHE_HASH
82 op->ob_shash = -1;
83#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000084#ifdef INTERN_STRINGS
85 op->ob_sinterned = NULL;
86#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (str != NULL)
88 memcpy(op->ob_sval, str, size);
89 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000090#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 if (size == 0) {
92 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
95 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000098#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
103PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000104 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000106 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000113#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000128#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
130 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000133 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000135 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136#ifdef CACHE_HASH
137 op->ob_shash = -1;
138#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000139#ifdef INTERN_STRINGS
140 op->ob_sinterned = NULL;
141#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000142 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000143#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
145 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 } else if (size == 1) {
148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000151#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Guido van Rossum234f9421993-06-17 12:35:49 +0000155static void
Guido van Rossume5372401993-03-16 12:15:04 +0000156string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000157 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000158{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000159 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000160}
161
Guido van Rossumd7047b31995-01-02 19:07:15 +0000162int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163PyString_Size(op)
164 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000165{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000166 if (!PyString_Check(op)) {
167 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000168 return -1;
169 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000170 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000171}
172
173/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000174PyString_AsString(op)
175 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000176{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000177 if (!PyString_Check(op)) {
178 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 return NULL;
180 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000181 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000182}
183
184/* Methods */
185
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000186static int
Guido van Rossume5372401993-03-16 12:15:04 +0000187string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000188 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000189 FILE *fp;
190 int flags;
191{
192 int i;
193 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000194 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000195 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000196 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000197 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000198 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000199 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000200
201 /* figure out which quote to use; single is prefered */
202 quote = '\'';
203 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
204 quote = '"';
205
206 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000207 for (i = 0; i < op->ob_size; i++) {
208 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000209 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 fprintf(fp, "\\%c", c);
211 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000212 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000214 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000217 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000218}
219
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000220static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000221string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000222 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000224 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
225 PyObject *v;
226 if (newsize > INT_MAX) {
227 PyErr_SetString(PyExc_OverflowError,
228 "string is too large to make repr");
229 }
230 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000232 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000233 }
234 else {
235 register int i;
236 register char c;
237 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000238 int quote;
239
240 /* figure out which quote to use; single is prefered */
241 quote = '\'';
242 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
243 quote = '"';
244
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000245 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000246 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000247 for (i = 0; i < op->ob_size; i++) {
248 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000249 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000250 *p++ = '\\', *p++ = c;
251 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000252 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000253 while (*p != '\0')
254 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000255 }
256 else
257 *p++ = c;
258 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000259 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000261 _PyString_Resize(
262 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000263 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265}
266
267static int
Guido van Rossume5372401993-03-16 12:15:04 +0000268string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000269 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000270{
271 return a->ob_size;
272}
273
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000274static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000275string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000276 register PyStringObject *a;
277 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278{
279 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000280 register PyStringObject *op;
281 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000282 if (PyUnicode_Check(bb))
283 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000284 PyErr_Format(PyExc_TypeError,
285 "cannot add type \"%.200s\" to string",
286 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000287 return NULL;
288 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000289#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290 /* Optimize cases with empty left or right operand */
291 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000293 return bb;
294 }
295 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000296 Py_INCREF(a);
297 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 }
299 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000300 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000301 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000302 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000303 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000304 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000305 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000306#ifdef CACHE_HASH
307 op->ob_shash = -1;
308#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000309#ifdef INTERN_STRINGS
310 op->ob_sinterned = NULL;
311#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000312 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
313 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
314 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000315 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000316#undef b
317}
318
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000319static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000320string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000321 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322 register int n;
323{
324 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000325 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000326 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000327 if (n < 0)
328 n = 0;
329 size = a->ob_size * n;
330 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000331 Py_INCREF(a);
332 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000334 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000335 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000336 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000337 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000338 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000339 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000340#ifdef CACHE_HASH
341 op->ob_shash = -1;
342#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000343#ifdef INTERN_STRINGS
344 op->ob_sinterned = NULL;
345#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000346 for (i = 0; i < size; i += a->ob_size)
347 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
348 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000349 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000350}
351
352/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
353
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000354static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000355string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000356 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000357 register int i, j; /* May be negative! */
358{
359 if (i < 0)
360 i = 0;
361 if (j < 0)
362 j = 0; /* Avoid signed/unsigned bug in next line */
363 if (j > a->ob_size)
364 j = a->ob_size;
365 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366 Py_INCREF(a);
367 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 }
369 if (j < i)
370 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000371 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000372}
373
Guido van Rossum9284a572000-03-07 15:53:43 +0000374static int
375string_contains(a, el)
376PyObject *a, *el;
377{
378 register char *s, *end;
379 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000380 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000381 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000382 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000383 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000384 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000385 return -1;
386 }
387 c = PyString_AsString(el)[0];
388 s = PyString_AsString(a);
389 end = s + PyString_Size(a);
390 while (s < end) {
391 if (c == *s++)
392 return 1;
393 }
394 return 0;
395}
396
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000397static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000398string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000399 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000400 register int i;
401{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000402 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000403 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000404 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000405 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406 return NULL;
407 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000408 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000410#ifdef COUNT_ALLOCS
411 if (v != NULL)
412 one_strings++;
413#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000414 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000416 if (v == NULL)
417 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000418 characters[c] = (PyStringObject *) v;
419 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000420 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000422 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423}
424
425static int
Guido van Rossume5372401993-03-16 12:15:04 +0000426string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000428{
Guido van Rossum253919f1991-02-13 23:18:39 +0000429 int len_a = a->ob_size, len_b = b->ob_size;
430 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000431 int cmp;
432 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000433 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000434 if (cmp == 0)
435 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
436 if (cmp != 0)
437 return cmp;
438 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000439 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440}
441
Guido van Rossum9bfef441993-03-29 10:43:31 +0000442static long
443string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000444 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000445{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000446 register int len;
447 register unsigned char *p;
448 register long x;
449
450#ifdef CACHE_HASH
451 if (a->ob_shash != -1)
452 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000453#ifdef INTERN_STRINGS
454 if (a->ob_sinterned != NULL)
455 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000456 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000457#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000458#endif
459 len = a->ob_size;
460 p = (unsigned char *) a->ob_sval;
461 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000462 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000463 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000464 x ^= a->ob_size;
465 if (x == -1)
466 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000467#ifdef CACHE_HASH
468 a->ob_shash = x;
469#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000470 return x;
471}
472
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000473static int
474string_buffer_getreadbuf(self, index, ptr)
475 PyStringObject *self;
476 int index;
477 const void **ptr;
478{
479 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000480 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000481 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000482 return -1;
483 }
484 *ptr = (void *)self->ob_sval;
485 return self->ob_size;
486}
487
488static int
489string_buffer_getwritebuf(self, index, ptr)
490 PyStringObject *self;
491 int index;
492 const void **ptr;
493{
Guido van Rossum045e6881997-09-08 18:30:11 +0000494 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000495 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000496 return -1;
497}
498
499static int
500string_buffer_getsegcount(self, lenp)
501 PyStringObject *self;
502 int *lenp;
503{
504 if ( lenp )
505 *lenp = self->ob_size;
506 return 1;
507}
508
Guido van Rossum1db70701998-10-08 02:18:52 +0000509static int
510string_buffer_getcharbuf(self, index, ptr)
511 PyStringObject *self;
512 int index;
513 const char **ptr;
514{
515 if ( index != 0 ) {
516 PyErr_SetString(PyExc_SystemError,
517 "accessing non-existent string segment");
518 return -1;
519 }
520 *ptr = self->ob_sval;
521 return self->ob_size;
522}
523
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000524static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000525 (inquiry)string_length, /*sq_length*/
526 (binaryfunc)string_concat, /*sq_concat*/
527 (intargfunc)string_repeat, /*sq_repeat*/
528 (intargfunc)string_item, /*sq_item*/
529 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000530 0, /*sq_ass_item*/
531 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000532 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533};
534
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000535static PyBufferProcs string_as_buffer = {
536 (getreadbufferproc)string_buffer_getreadbuf,
537 (getwritebufferproc)string_buffer_getwritebuf,
538 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000539 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000540};
541
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000542
543
544#define LEFTSTRIP 0
545#define RIGHTSTRIP 1
546#define BOTHSTRIP 2
547
548
549static PyObject *
550split_whitespace(s, len, maxsplit)
551 char *s;
552 int len;
553 int maxsplit;
554{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000555 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000556 PyObject* item;
557 PyObject *list = PyList_New(0);
558
559 if (list == NULL)
560 return NULL;
561
Guido van Rossum4c08d552000-03-10 22:55:18 +0000562 for (i = j = 0; i < len; ) {
563 while (i < len && isspace(Py_CHARMASK(s[i])))
564 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000565 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000566 while (i < len && !isspace(Py_CHARMASK(s[i])))
567 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000568 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000569 if (maxsplit-- <= 0)
570 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000571 item = PyString_FromStringAndSize(s+j, (int)(i-j));
572 if (item == NULL)
573 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000574 err = PyList_Append(list, item);
575 Py_DECREF(item);
576 if (err < 0)
577 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000578 while (i < len && isspace(Py_CHARMASK(s[i])))
579 i++;
580 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000581 }
582 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000583 if (j < len) {
584 item = PyString_FromStringAndSize(s+j, (int)(len - j));
585 if (item == NULL)
586 goto finally;
587 err = PyList_Append(list, item);
588 Py_DECREF(item);
589 if (err < 0)
590 goto finally;
591 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000592 return list;
593 finally:
594 Py_DECREF(list);
595 return NULL;
596}
597
598
599static char split__doc__[] =
600"S.split([sep [,maxsplit]]) -> list of strings\n\
601\n\
602Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000603delimiter string. If maxsplit is given, at most maxsplit\n\
604splits are done. If sep is not specified, any whitespace string\n\
605is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000606
607static PyObject *
608string_split(self, args)
609 PyStringObject *self;
610 PyObject *args;
611{
612 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000613 int maxsplit = -1;
614 const char *s = PyString_AS_STRING(self), *sub;
615 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000616
Guido van Rossum4c08d552000-03-10 22:55:18 +0000617 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000618 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000619 if (maxsplit < 0)
620 maxsplit = INT_MAX;
621 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000622 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000623 if (PyString_Check(subobj)) {
624 sub = PyString_AS_STRING(subobj);
625 n = PyString_GET_SIZE(subobj);
626 }
627 else if (PyUnicode_Check(subobj))
628 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
629 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
630 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000631 if (n == 0) {
632 PyErr_SetString(PyExc_ValueError, "empty separator");
633 return NULL;
634 }
635
636 list = PyList_New(0);
637 if (list == NULL)
638 return NULL;
639
640 i = j = 0;
641 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000642 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000643 if (maxsplit-- <= 0)
644 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000645 item = PyString_FromStringAndSize(s+j, (int)(i-j));
646 if (item == NULL)
647 goto fail;
648 err = PyList_Append(list, item);
649 Py_DECREF(item);
650 if (err < 0)
651 goto fail;
652 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000653 }
654 else
655 i++;
656 }
657 item = PyString_FromStringAndSize(s+j, (int)(len-j));
658 if (item == NULL)
659 goto fail;
660 err = PyList_Append(list, item);
661 Py_DECREF(item);
662 if (err < 0)
663 goto fail;
664
665 return list;
666
667 fail:
668 Py_DECREF(list);
669 return NULL;
670}
671
672
673static char join__doc__[] =
674"S.join(sequence) -> string\n\
675\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000676Return a string which is the concatenation of the strings in the\n\
677sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000678
679static PyObject *
680string_join(self, args)
681 PyStringObject *self;
682 PyObject *args;
683{
684 char *sep = PyString_AS_STRING(self);
685 int seplen = PyString_GET_SIZE(self);
686 PyObject *res = NULL;
687 int reslen = 0;
688 char *p;
689 int seqlen = 0;
690 int sz = 100;
691 int i, slen;
692 PyObject *seq;
693
Guido van Rossum43713e52000-02-29 13:59:29 +0000694 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000695 return NULL;
696
697 seqlen = PySequence_Length(seq);
698 if (seqlen < 0 && PyErr_Occurred())
699 return NULL;
700
701 if (seqlen == 1) {
702 /* Optimization if there's only one item */
703 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000704 if (item == NULL)
705 return NULL;
706 if (!PyString_Check(item) &&
707 !PyUnicode_Check(item)) {
708 PyErr_SetString(PyExc_TypeError,
709 "first argument must be sequence of strings");
710 Py_DECREF(item);
711 return NULL;
712 }
713 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000714 }
715 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
716 return NULL;
717 p = PyString_AsString(res);
718
719 /* optimize for lists. all others (tuples and arbitrary sequences)
720 * just use the abstract interface.
721 */
722 if (PyList_Check(seq)) {
723 for (i = 0; i < seqlen; i++) {
724 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000725 if (!PyString_Check(item)){
726 if (PyUnicode_Check(item)) {
727 Py_DECREF(res);
728 return PyUnicode_Join(
729 (PyObject *)self,
730 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000731 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000732 PyErr_Format(PyExc_TypeError,
733 "sequence item %i not a string",
734 i);
735 goto finally;
736 }
737 slen = PyString_GET_SIZE(item);
738 while (reslen + slen + seplen >= sz) {
739 if (_PyString_Resize(&res, sz*2))
740 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000741 sz *= 2;
742 p = PyString_AsString(res) + reslen;
743 }
744 if (i > 0) {
745 memcpy(p, sep, seplen);
746 p += seplen;
747 reslen += seplen;
748 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000749 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000750 p += slen;
751 reslen += slen;
752 }
753 }
754 else {
755 for (i = 0; i < seqlen; i++) {
756 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000757 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000758 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000759 if (!PyString_Check(item)){
760 if (PyUnicode_Check(item)) {
761 Py_DECREF(res);
762 Py_DECREF(item);
763 return PyUnicode_Join(
764 (PyObject *)self,
765 seq);
766 }
767 Py_DECREF(item);
768 PyErr_Format(PyExc_TypeError,
769 "sequence item %i not a string",
770 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000771 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000772 }
773 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000774 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000775 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000776 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000777 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000778 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000779 sz *= 2;
780 p = PyString_AsString(res) + reslen;
781 }
782 if (i > 0) {
783 memcpy(p, sep, seplen);
784 p += seplen;
785 reslen += seplen;
786 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000787 memcpy(p, PyString_AS_STRING(item), slen);
788 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 p += slen;
790 reslen += slen;
791 }
792 }
793 if (_PyString_Resize(&res, reslen))
794 goto finally;
795 return res;
796
797 finally:
798 Py_DECREF(res);
799 return NULL;
800}
801
802
803
804static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000805string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000806 PyStringObject *self;
807 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000808 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000809{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000810 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000811 int len = PyString_GET_SIZE(self);
812 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000813 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000814
Guido van Rossumc6821402000-05-08 14:08:05 +0000815 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
816 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817 return -2;
818 if (PyString_Check(subobj)) {
819 sub = PyString_AS_STRING(subobj);
820 n = PyString_GET_SIZE(subobj);
821 }
822 else if (PyUnicode_Check(subobj))
823 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
824 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 return -2;
826
827 if (last > len)
828 last = len;
829 if (last < 0)
830 last += len;
831 if (last < 0)
832 last = 0;
833 if (i < 0)
834 i += len;
835 if (i < 0)
836 i = 0;
837
Guido van Rossum4c08d552000-03-10 22:55:18 +0000838 if (dir > 0) {
839 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000840 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000841 last -= n;
842 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000843 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000844 return (long)i;
845 }
846 else {
847 int j;
848
849 if (n == 0 && i <= last)
850 return (long)last;
851 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000852 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000853 return (long)j;
854 }
855
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000856 return -1;
857}
858
859
860static char find__doc__[] =
861"S.find(sub [,start [,end]]) -> int\n\
862\n\
863Return the lowest index in S where substring sub is found,\n\
864such that sub is contained within s[start,end]. Optional\n\
865arguments start and end are interpreted as in slice notation.\n\
866\n\
867Return -1 on failure.";
868
869static PyObject *
870string_find(self, args)
871 PyStringObject *self;
872 PyObject *args;
873{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000874 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875 if (result == -2)
876 return NULL;
877 return PyInt_FromLong(result);
878}
879
880
881static char index__doc__[] =
882"S.index(sub [,start [,end]]) -> int\n\
883\n\
884Like S.find() but raise ValueError when the substring is not found.";
885
886static PyObject *
887string_index(self, args)
888 PyStringObject *self;
889 PyObject *args;
890{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000892 if (result == -2)
893 return NULL;
894 if (result == -1) {
895 PyErr_SetString(PyExc_ValueError,
896 "substring not found in string.index");
897 return NULL;
898 }
899 return PyInt_FromLong(result);
900}
901
902
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000903static char rfind__doc__[] =
904"S.rfind(sub [,start [,end]]) -> int\n\
905\n\
906Return the highest index in S where substring sub is found,\n\
907such that sub is contained within s[start,end]. Optional\n\
908arguments start and end are interpreted as in slice notation.\n\
909\n\
910Return -1 on failure.";
911
912static PyObject *
913string_rfind(self, args)
914 PyStringObject *self;
915 PyObject *args;
916{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000917 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000918 if (result == -2)
919 return NULL;
920 return PyInt_FromLong(result);
921}
922
923
924static char rindex__doc__[] =
925"S.rindex(sub [,start [,end]]) -> int\n\
926\n\
927Like S.rfind() but raise ValueError when the substring is not found.";
928
929static PyObject *
930string_rindex(self, args)
931 PyStringObject *self;
932 PyObject *args;
933{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000934 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000935 if (result == -2)
936 return NULL;
937 if (result == -1) {
938 PyErr_SetString(PyExc_ValueError,
939 "substring not found in string.rindex");
940 return NULL;
941 }
942 return PyInt_FromLong(result);
943}
944
945
946static PyObject *
947do_strip(self, args, striptype)
948 PyStringObject *self;
949 PyObject *args;
950 int striptype;
951{
952 char *s = PyString_AS_STRING(self);
953 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000954
Guido van Rossum43713e52000-02-29 13:59:29 +0000955 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000956 return NULL;
957
958 i = 0;
959 if (striptype != RIGHTSTRIP) {
960 while (i < len && isspace(Py_CHARMASK(s[i]))) {
961 i++;
962 }
963 }
964
965 j = len;
966 if (striptype != LEFTSTRIP) {
967 do {
968 j--;
969 } while (j >= i && isspace(Py_CHARMASK(s[j])));
970 j++;
971 }
972
973 if (i == 0 && j == len) {
974 Py_INCREF(self);
975 return (PyObject*)self;
976 }
977 else
978 return PyString_FromStringAndSize(s+i, j-i);
979}
980
981
982static char strip__doc__[] =
983"S.strip() -> string\n\
984\n\
985Return a copy of the string S with leading and trailing\n\
986whitespace removed.";
987
988static PyObject *
989string_strip(self, args)
990 PyStringObject *self;
991 PyObject *args;
992{
993 return do_strip(self, args, BOTHSTRIP);
994}
995
996
997static char lstrip__doc__[] =
998"S.lstrip() -> string\n\
999\n\
1000Return a copy of the string S with leading whitespace removed.";
1001
1002static PyObject *
1003string_lstrip(self, args)
1004 PyStringObject *self;
1005 PyObject *args;
1006{
1007 return do_strip(self, args, LEFTSTRIP);
1008}
1009
1010
1011static char rstrip__doc__[] =
1012"S.rstrip() -> string\n\
1013\n\
1014Return a copy of the string S with trailing whitespace removed.";
1015
1016static PyObject *
1017string_rstrip(self, args)
1018 PyStringObject *self;
1019 PyObject *args;
1020{
1021 return do_strip(self, args, RIGHTSTRIP);
1022}
1023
1024
1025static char lower__doc__[] =
1026"S.lower() -> string\n\
1027\n\
1028Return a copy of the string S converted to lowercase.";
1029
1030static PyObject *
1031string_lower(self, args)
1032 PyStringObject *self;
1033 PyObject *args;
1034{
1035 char *s = PyString_AS_STRING(self), *s_new;
1036 int i, n = PyString_GET_SIZE(self);
1037 PyObject *new;
1038
Guido van Rossum43713e52000-02-29 13:59:29 +00001039 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001040 return NULL;
1041 new = PyString_FromStringAndSize(NULL, n);
1042 if (new == NULL)
1043 return NULL;
1044 s_new = PyString_AsString(new);
1045 for (i = 0; i < n; i++) {
1046 int c = Py_CHARMASK(*s++);
1047 if (isupper(c)) {
1048 *s_new = tolower(c);
1049 } else
1050 *s_new = c;
1051 s_new++;
1052 }
1053 return new;
1054}
1055
1056
1057static char upper__doc__[] =
1058"S.upper() -> string\n\
1059\n\
1060Return a copy of the string S converted to uppercase.";
1061
1062static PyObject *
1063string_upper(self, args)
1064 PyStringObject *self;
1065 PyObject *args;
1066{
1067 char *s = PyString_AS_STRING(self), *s_new;
1068 int i, n = PyString_GET_SIZE(self);
1069 PyObject *new;
1070
Guido van Rossum43713e52000-02-29 13:59:29 +00001071 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001072 return NULL;
1073 new = PyString_FromStringAndSize(NULL, n);
1074 if (new == NULL)
1075 return NULL;
1076 s_new = PyString_AsString(new);
1077 for (i = 0; i < n; i++) {
1078 int c = Py_CHARMASK(*s++);
1079 if (islower(c)) {
1080 *s_new = toupper(c);
1081 } else
1082 *s_new = c;
1083 s_new++;
1084 }
1085 return new;
1086}
1087
1088
Guido van Rossum4c08d552000-03-10 22:55:18 +00001089static char title__doc__[] =
1090"S.title() -> string\n\
1091\n\
1092Return a titlecased version of S, i.e. words start with uppercase\n\
1093characters, all remaining cased characters have lowercase.";
1094
1095static PyObject*
1096string_title(PyUnicodeObject *self, PyObject *args)
1097{
1098 char *s = PyString_AS_STRING(self), *s_new;
1099 int i, n = PyString_GET_SIZE(self);
1100 int previous_is_cased = 0;
1101 PyObject *new;
1102
1103 if (!PyArg_ParseTuple(args, ":title"))
1104 return NULL;
1105 new = PyString_FromStringAndSize(NULL, n);
1106 if (new == NULL)
1107 return NULL;
1108 s_new = PyString_AsString(new);
1109 for (i = 0; i < n; i++) {
1110 int c = Py_CHARMASK(*s++);
1111 if (islower(c)) {
1112 if (!previous_is_cased)
1113 c = toupper(c);
1114 previous_is_cased = 1;
1115 } else if (isupper(c)) {
1116 if (previous_is_cased)
1117 c = tolower(c);
1118 previous_is_cased = 1;
1119 } else
1120 previous_is_cased = 0;
1121 *s_new++ = c;
1122 }
1123 return new;
1124}
1125
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126static char capitalize__doc__[] =
1127"S.capitalize() -> string\n\
1128\n\
1129Return a copy of the string S with only its first character\n\
1130capitalized.";
1131
1132static PyObject *
1133string_capitalize(self, args)
1134 PyStringObject *self;
1135 PyObject *args;
1136{
1137 char *s = PyString_AS_STRING(self), *s_new;
1138 int i, n = PyString_GET_SIZE(self);
1139 PyObject *new;
1140
Guido van Rossum43713e52000-02-29 13:59:29 +00001141 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001142 return NULL;
1143 new = PyString_FromStringAndSize(NULL, n);
1144 if (new == NULL)
1145 return NULL;
1146 s_new = PyString_AsString(new);
1147 if (0 < n) {
1148 int c = Py_CHARMASK(*s++);
1149 if (islower(c))
1150 *s_new = toupper(c);
1151 else
1152 *s_new = c;
1153 s_new++;
1154 }
1155 for (i = 1; i < n; i++) {
1156 int c = Py_CHARMASK(*s++);
1157 if (isupper(c))
1158 *s_new = tolower(c);
1159 else
1160 *s_new = c;
1161 s_new++;
1162 }
1163 return new;
1164}
1165
1166
1167static char count__doc__[] =
1168"S.count(sub[, start[, end]]) -> int\n\
1169\n\
1170Return the number of occurrences of substring sub in string\n\
1171S[start:end]. Optional arguments start and end are\n\
1172interpreted as in slice notation.";
1173
1174static PyObject *
1175string_count(self, args)
1176 PyStringObject *self;
1177 PyObject *args;
1178{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001179 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180 int len = PyString_GET_SIZE(self), n;
1181 int i = 0, last = INT_MAX;
1182 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001183 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001184
Guido van Rossumc6821402000-05-08 14:08:05 +00001185 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1186 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001187 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001188
Guido van Rossum4c08d552000-03-10 22:55:18 +00001189 if (PyString_Check(subobj)) {
1190 sub = PyString_AS_STRING(subobj);
1191 n = PyString_GET_SIZE(subobj);
1192 }
1193 else if (PyUnicode_Check(subobj))
1194 return PyInt_FromLong(
1195 PyUnicode_Count((PyObject *)self, subobj, i, last));
1196 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1197 return NULL;
1198
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199 if (last > len)
1200 last = len;
1201 if (last < 0)
1202 last += len;
1203 if (last < 0)
1204 last = 0;
1205 if (i < 0)
1206 i += len;
1207 if (i < 0)
1208 i = 0;
1209 m = last + 1 - n;
1210 if (n == 0)
1211 return PyInt_FromLong((long) (m-i));
1212
1213 r = 0;
1214 while (i < m) {
1215 if (!memcmp(s+i, sub, n)) {
1216 r++;
1217 i += n;
1218 } else {
1219 i++;
1220 }
1221 }
1222 return PyInt_FromLong((long) r);
1223}
1224
1225
1226static char swapcase__doc__[] =
1227"S.swapcase() -> string\n\
1228\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001229Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230converted to lowercase and vice versa.";
1231
1232static PyObject *
1233string_swapcase(self, args)
1234 PyStringObject *self;
1235 PyObject *args;
1236{
1237 char *s = PyString_AS_STRING(self), *s_new;
1238 int i, n = PyString_GET_SIZE(self);
1239 PyObject *new;
1240
Guido van Rossum43713e52000-02-29 13:59:29 +00001241 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242 return NULL;
1243 new = PyString_FromStringAndSize(NULL, n);
1244 if (new == NULL)
1245 return NULL;
1246 s_new = PyString_AsString(new);
1247 for (i = 0; i < n; i++) {
1248 int c = Py_CHARMASK(*s++);
1249 if (islower(c)) {
1250 *s_new = toupper(c);
1251 }
1252 else if (isupper(c)) {
1253 *s_new = tolower(c);
1254 }
1255 else
1256 *s_new = c;
1257 s_new++;
1258 }
1259 return new;
1260}
1261
1262
1263static char translate__doc__[] =
1264"S.translate(table [,deletechars]) -> string\n\
1265\n\
1266Return a copy of the string S, where all characters occurring\n\
1267in the optional argument deletechars are removed, and the\n\
1268remaining characters have been mapped through the given\n\
1269translation table, which must be a string of length 256.";
1270
1271static PyObject *
1272string_translate(self, args)
1273 PyStringObject *self;
1274 PyObject *args;
1275{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001276 register char *input, *output;
1277 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278 register int i, c, changed = 0;
1279 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001280 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 int inlen, tablen, dellen = 0;
1282 PyObject *result;
1283 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001284 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001285
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286 if (!PyArg_ParseTuple(args, "O|O:translate",
1287 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001289
1290 if (PyString_Check(tableobj)) {
1291 table1 = PyString_AS_STRING(tableobj);
1292 tablen = PyString_GET_SIZE(tableobj);
1293 }
1294 else if (PyUnicode_Check(tableobj)) {
1295 /* Unicode .translate() does not support the deletechars
1296 parameter; instead a mapping to None will cause characters
1297 to be deleted. */
1298 if (delobj != NULL) {
1299 PyErr_SetString(PyExc_TypeError,
1300 "deletions are implemented differently for unicode");
1301 return NULL;
1302 }
1303 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1304 }
1305 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307
1308 if (delobj != NULL) {
1309 if (PyString_Check(delobj)) {
1310 del_table = PyString_AS_STRING(delobj);
1311 dellen = PyString_GET_SIZE(delobj);
1312 }
1313 else if (PyUnicode_Check(delobj)) {
1314 PyErr_SetString(PyExc_TypeError,
1315 "deletions are implemented differently for unicode");
1316 return NULL;
1317 }
1318 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1319 return NULL;
1320
1321 if (tablen != 256) {
1322 PyErr_SetString(PyExc_ValueError,
1323 "translation table must be 256 characters long");
1324 return NULL;
1325 }
1326 }
1327 else {
1328 del_table = NULL;
1329 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 }
1331
1332 table = table1;
1333 inlen = PyString_Size(input_obj);
1334 result = PyString_FromStringAndSize((char *)NULL, inlen);
1335 if (result == NULL)
1336 return NULL;
1337 output_start = output = PyString_AsString(result);
1338 input = PyString_AsString(input_obj);
1339
1340 if (dellen == 0) {
1341 /* If no deletions are required, use faster code */
1342 for (i = inlen; --i >= 0; ) {
1343 c = Py_CHARMASK(*input++);
1344 if (Py_CHARMASK((*output++ = table[c])) != c)
1345 changed = 1;
1346 }
1347 if (changed)
1348 return result;
1349 Py_DECREF(result);
1350 Py_INCREF(input_obj);
1351 return input_obj;
1352 }
1353
1354 for (i = 0; i < 256; i++)
1355 trans_table[i] = Py_CHARMASK(table[i]);
1356
1357 for (i = 0; i < dellen; i++)
1358 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1359
1360 for (i = inlen; --i >= 0; ) {
1361 c = Py_CHARMASK(*input++);
1362 if (trans_table[c] != -1)
1363 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1364 continue;
1365 changed = 1;
1366 }
1367 if (!changed) {
1368 Py_DECREF(result);
1369 Py_INCREF(input_obj);
1370 return input_obj;
1371 }
1372 /* Fix the size of the resulting string */
1373 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1374 return NULL;
1375 return result;
1376}
1377
1378
1379/* What follows is used for implementing replace(). Perry Stoll. */
1380
1381/*
1382 mymemfind
1383
1384 strstr replacement for arbitrary blocks of memory.
1385
Barry Warsaw51ac5802000-03-20 16:36:48 +00001386 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387 contents of memory pointed to by PAT. Returns the index into MEM if
1388 found, or -1 if not found. If len of PAT is greater than length of
1389 MEM, the function returns -1.
1390*/
1391static int
1392mymemfind(mem, len, pat, pat_len)
1393 char *mem;
1394 int len;
1395 char *pat;
1396 int pat_len;
1397{
1398 register int ii;
1399
1400 /* pattern can not occur in the last pat_len-1 chars */
1401 len -= pat_len;
1402
1403 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001404 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405 return ii;
1406 }
1407 }
1408 return -1;
1409}
1410
1411/*
1412 mymemcnt
1413
1414 Return the number of distinct times PAT is found in MEM.
1415 meaning mem=1111 and pat==11 returns 2.
1416 mem=11111 and pat==11 also return 2.
1417 */
1418static int
1419mymemcnt(mem, len, pat, pat_len)
1420 char *mem;
1421 int len;
1422 char *pat;
1423 int pat_len;
1424{
1425 register int offset = 0;
1426 int nfound = 0;
1427
1428 while (len >= 0) {
1429 offset = mymemfind(mem, len, pat, pat_len);
1430 if (offset == -1)
1431 break;
1432 mem += offset + pat_len;
1433 len -= offset + pat_len;
1434 nfound++;
1435 }
1436 return nfound;
1437}
1438
1439/*
1440 mymemreplace
1441
1442 Return a string in which all occurences of PAT in memory STR are
1443 replaced with SUB.
1444
1445 If length of PAT is less than length of STR or there are no occurences
1446 of PAT in STR, then the original string is returned. Otherwise, a new
1447 string is allocated here and returned.
1448
1449 on return, out_len is:
1450 the length of output string, or
1451 -1 if the input string is returned, or
1452 unchanged if an error occurs (no memory).
1453
1454 return value is:
1455 the new string allocated locally, or
1456 NULL if an error occurred.
1457*/
1458static char *
1459mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1460 char *str;
1461 int len; /* input string */
1462 char *pat;
1463 int pat_len; /* pattern string to find */
1464 char *sub;
1465 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001466 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467 int *out_len;
1468
1469{
1470 char *out_s;
1471 char *new_s;
1472 int nfound, offset, new_len;
1473
1474 if (len == 0 || pat_len > len)
1475 goto return_same;
1476
1477 /* find length of output string */
1478 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001479 if (count < 0)
1480 count = INT_MAX;
1481 else if (nfound > count)
1482 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483 if (nfound == 0)
1484 goto return_same;
1485 new_len = len + nfound*(sub_len - pat_len);
1486
Guido van Rossumb18618d2000-05-03 23:44:39 +00001487 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 if (new_s == NULL) return NULL;
1489
1490 *out_len = new_len;
1491 out_s = new_s;
1492
1493 while (len > 0) {
1494 /* find index of next instance of pattern */
1495 offset = mymemfind(str, len, pat, pat_len);
1496 /* if not found, break out of loop */
1497 if (offset == -1) break;
1498
1499 /* copy non matching part of input string */
1500 memcpy(new_s, str, offset); /* copy part of str before pat */
1501 str += offset + pat_len; /* move str past pattern */
1502 len -= offset + pat_len; /* reduce length of str remaining */
1503
1504 /* copy substitute into the output string */
1505 new_s += offset; /* move new_s to dest for sub string */
1506 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1507 new_s += sub_len; /* offset new_s past sub string */
1508
1509 /* break when we've done count replacements */
1510 if (--count == 0) break;
1511 }
1512 /* copy any remaining values into output string */
1513 if (len > 0)
1514 memcpy(new_s, str, len);
1515 return out_s;
1516
1517 return_same:
1518 *out_len = -1;
1519 return str;
1520}
1521
1522
1523static char replace__doc__[] =
1524"S.replace (old, new[, maxsplit]) -> string\n\
1525\n\
1526Return a copy of string S with all occurrences of substring\n\
1527old replaced by new. If the optional argument maxsplit is\n\
1528given, only the first maxsplit occurrences are replaced.";
1529
1530static PyObject *
1531string_replace(self, args)
1532 PyStringObject *self;
1533 PyObject *args;
1534{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535 const char *str = PyString_AS_STRING(self), *sub, *repl;
1536 char *new_s;
1537 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1538 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001540 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001541
Guido van Rossum4c08d552000-03-10 22:55:18 +00001542 if (!PyArg_ParseTuple(args, "OO|i:replace",
1543 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001544 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001545
1546 if (PyString_Check(subobj)) {
1547 sub = PyString_AS_STRING(subobj);
1548 sub_len = PyString_GET_SIZE(subobj);
1549 }
1550 else if (PyUnicode_Check(subobj))
1551 return PyUnicode_Replace((PyObject *)self,
1552 subobj, replobj, count);
1553 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1554 return NULL;
1555
1556 if (PyString_Check(replobj)) {
1557 repl = PyString_AS_STRING(replobj);
1558 repl_len = PyString_GET_SIZE(replobj);
1559 }
1560 else if (PyUnicode_Check(replobj))
1561 return PyUnicode_Replace((PyObject *)self,
1562 subobj, replobj, count);
1563 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1564 return NULL;
1565
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001566 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001567 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568 return NULL;
1569 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001570 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 if (new_s == NULL) {
1572 PyErr_NoMemory();
1573 return NULL;
1574 }
1575 if (out_len == -1) {
1576 /* we're returning another reference to self */
1577 new = (PyObject*)self;
1578 Py_INCREF(new);
1579 }
1580 else {
1581 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001582 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583 }
1584 return new;
1585}
1586
1587
1588static char startswith__doc__[] =
1589"S.startswith(prefix[, start[, end]]) -> int\n\
1590\n\
1591Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1592optional start, test S beginning at that position. With optional end, stop\n\
1593comparing S at that position.";
1594
1595static PyObject *
1596string_startswith(self, args)
1597 PyStringObject *self;
1598 PyObject *args;
1599{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001600 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001602 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 int plen;
1604 int start = 0;
1605 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607
Guido van Rossumc6821402000-05-08 14:08:05 +00001608 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1609 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001610 return NULL;
1611 if (PyString_Check(subobj)) {
1612 prefix = PyString_AS_STRING(subobj);
1613 plen = PyString_GET_SIZE(subobj);
1614 }
1615 else if (PyUnicode_Check(subobj))
1616 return PyInt_FromLong(
1617 PyUnicode_Tailmatch((PyObject *)self,
1618 subobj, start, end, -1));
1619 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620 return NULL;
1621
1622 /* adopt Java semantics for index out of range. it is legal for
1623 * offset to be == plen, but this only returns true if prefix is
1624 * the empty string.
1625 */
1626 if (start < 0 || start+plen > len)
1627 return PyInt_FromLong(0);
1628
1629 if (!memcmp(str+start, prefix, plen)) {
1630 /* did the match end after the specified end? */
1631 if (end < 0)
1632 return PyInt_FromLong(1);
1633 else if (end - start < plen)
1634 return PyInt_FromLong(0);
1635 else
1636 return PyInt_FromLong(1);
1637 }
1638 else return PyInt_FromLong(0);
1639}
1640
1641
1642static char endswith__doc__[] =
1643"S.endswith(suffix[, start[, end]]) -> int\n\
1644\n\
1645Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1646optional start, test S beginning at that position. With optional end, stop\n\
1647comparing S at that position.";
1648
1649static PyObject *
1650string_endswith(self, args)
1651 PyStringObject *self;
1652 PyObject *args;
1653{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001654 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001656 const char* suffix;
1657 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 int start = 0;
1659 int end = -1;
1660 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001661 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662
Guido van Rossumc6821402000-05-08 14:08:05 +00001663 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1664 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665 return NULL;
1666 if (PyString_Check(subobj)) {
1667 suffix = PyString_AS_STRING(subobj);
1668 slen = PyString_GET_SIZE(subobj);
1669 }
1670 else if (PyUnicode_Check(subobj))
1671 return PyInt_FromLong(
1672 PyUnicode_Tailmatch((PyObject *)self,
1673 subobj, start, end, +1));
1674 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675 return NULL;
1676
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678 return PyInt_FromLong(0);
1679
1680 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684 return PyInt_FromLong(1);
1685 else return PyInt_FromLong(0);
1686}
1687
1688
Guido van Rossum4c08d552000-03-10 22:55:18 +00001689static char expandtabs__doc__[] =
1690"S.expandtabs([tabsize]) -> string\n\
1691\n\
1692Return a copy of S where all tab characters are expanded using spaces.\n\
1693If tabsize is not given, a tab size of 8 characters is assumed.";
1694
1695static PyObject*
1696string_expandtabs(PyStringObject *self, PyObject *args)
1697{
1698 const char *e, *p;
1699 char *q;
1700 int i, j;
1701 PyObject *u;
1702 int tabsize = 8;
1703
1704 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1705 return NULL;
1706
1707 /* First pass: determine size of ouput string */
1708 i = j = 0;
1709 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1710 for (p = PyString_AS_STRING(self); p < e; p++)
1711 if (*p == '\t') {
1712 if (tabsize > 0)
1713 j += tabsize - (j % tabsize);
1714 }
1715 else {
1716 j++;
1717 if (*p == '\n' || *p == '\r') {
1718 i += j;
1719 j = 0;
1720 }
1721 }
1722
1723 /* Second pass: create output string and fill it */
1724 u = PyString_FromStringAndSize(NULL, i + j);
1725 if (!u)
1726 return NULL;
1727
1728 j = 0;
1729 q = PyString_AS_STRING(u);
1730
1731 for (p = PyString_AS_STRING(self); p < e; p++)
1732 if (*p == '\t') {
1733 if (tabsize > 0) {
1734 i = tabsize - (j % tabsize);
1735 j += i;
1736 while (i--)
1737 *q++ = ' ';
1738 }
1739 }
1740 else {
1741 j++;
1742 *q++ = *p;
1743 if (*p == '\n' || *p == '\r')
1744 j = 0;
1745 }
1746
1747 return u;
1748}
1749
1750static
1751PyObject *pad(PyStringObject *self,
1752 int left,
1753 int right,
1754 char fill)
1755{
1756 PyObject *u;
1757
1758 if (left < 0)
1759 left = 0;
1760 if (right < 0)
1761 right = 0;
1762
1763 if (left == 0 && right == 0) {
1764 Py_INCREF(self);
1765 return (PyObject *)self;
1766 }
1767
1768 u = PyString_FromStringAndSize(NULL,
1769 left + PyString_GET_SIZE(self) + right);
1770 if (u) {
1771 if (left)
1772 memset(PyString_AS_STRING(u), fill, left);
1773 memcpy(PyString_AS_STRING(u) + left,
1774 PyString_AS_STRING(self),
1775 PyString_GET_SIZE(self));
1776 if (right)
1777 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1778 fill, right);
1779 }
1780
1781 return u;
1782}
1783
1784static char ljust__doc__[] =
1785"S.ljust(width) -> string\n\
1786\n\
1787Return S left justified in a string of length width. Padding is\n\
1788done using spaces.";
1789
1790static PyObject *
1791string_ljust(PyStringObject *self, PyObject *args)
1792{
1793 int width;
1794 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1795 return NULL;
1796
1797 if (PyString_GET_SIZE(self) >= width) {
1798 Py_INCREF(self);
1799 return (PyObject*) self;
1800 }
1801
1802 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1803}
1804
1805
1806static char rjust__doc__[] =
1807"S.rjust(width) -> string\n\
1808\n\
1809Return S right justified in a string of length width. Padding is\n\
1810done using spaces.";
1811
1812static PyObject *
1813string_rjust(PyStringObject *self, PyObject *args)
1814{
1815 int width;
1816 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1817 return NULL;
1818
1819 if (PyString_GET_SIZE(self) >= width) {
1820 Py_INCREF(self);
1821 return (PyObject*) self;
1822 }
1823
1824 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1825}
1826
1827
1828static char center__doc__[] =
1829"S.center(width) -> string\n\
1830\n\
1831Return S centered in a string of length width. Padding is done\n\
1832using spaces.";
1833
1834static PyObject *
1835string_center(PyStringObject *self, PyObject *args)
1836{
1837 int marg, left;
1838 int width;
1839
1840 if (!PyArg_ParseTuple(args, "i:center", &width))
1841 return NULL;
1842
1843 if (PyString_GET_SIZE(self) >= width) {
1844 Py_INCREF(self);
1845 return (PyObject*) self;
1846 }
1847
1848 marg = width - PyString_GET_SIZE(self);
1849 left = marg / 2 + (marg & width & 1);
1850
1851 return pad(self, left, marg - left, ' ');
1852}
1853
1854#if 0
1855static char zfill__doc__[] =
1856"S.zfill(width) -> string\n\
1857\n\
1858Pad a numeric string x with zeros on the left, to fill a field\n\
1859of the specified width. The string x is never truncated.";
1860
1861static PyObject *
1862string_zfill(PyStringObject *self, PyObject *args)
1863{
1864 int fill;
1865 PyObject *u;
1866 char *str;
1867
1868 int width;
1869 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1870 return NULL;
1871
1872 if (PyString_GET_SIZE(self) >= width) {
1873 Py_INCREF(self);
1874 return (PyObject*) self;
1875 }
1876
1877 fill = width - PyString_GET_SIZE(self);
1878
1879 u = pad(self, fill, 0, '0');
1880 if (u == NULL)
1881 return NULL;
1882
1883 str = PyString_AS_STRING(u);
1884 if (str[fill] == '+' || str[fill] == '-') {
1885 /* move sign to beginning of string */
1886 str[0] = str[fill];
1887 str[fill] = '0';
1888 }
1889
1890 return u;
1891}
1892#endif
1893
1894static char isspace__doc__[] =
1895"S.isspace() -> int\n\
1896\n\
1897Return 1 if there are only whitespace characters in S,\n\
18980 otherwise.";
1899
1900static PyObject*
1901string_isspace(PyStringObject *self, PyObject *args)
1902{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001903 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1904 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001905
1906 if (!PyArg_NoArgs(args))
1907 return NULL;
1908
1909 /* Shortcut for single character strings */
1910 if (PyString_GET_SIZE(self) == 1 &&
1911 isspace(*p))
1912 return PyInt_FromLong(1);
1913
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001914 /* Special case for empty strings */
1915 if (PyString_GET_SIZE(self) == 0)
1916 return PyInt_FromLong(0);
1917
Guido van Rossum4c08d552000-03-10 22:55:18 +00001918 e = p + PyString_GET_SIZE(self);
1919 for (; p < e; p++) {
1920 if (!isspace(*p))
1921 return PyInt_FromLong(0);
1922 }
1923 return PyInt_FromLong(1);
1924}
1925
1926
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00001927static char isalpha__doc__[] =
1928"S.isalpha() -> int\n\
1929\n\
1930Return 1 if all characters in S are alphabetic\n\
1931and there is at least one character in S, 0 otherwise.";
1932
1933static PyObject*
1934string_isalpha(PyUnicodeObject *self, PyObject *args)
1935{
1936 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1937 register const unsigned char *e;
1938
1939 if (!PyArg_NoArgs(args))
1940 return NULL;
1941
1942 /* Shortcut for single character strings */
1943 if (PyString_GET_SIZE(self) == 1 &&
1944 isalpha(*p))
1945 return PyInt_FromLong(1);
1946
1947 /* Special case for empty strings */
1948 if (PyString_GET_SIZE(self) == 0)
1949 return PyInt_FromLong(0);
1950
1951 e = p + PyString_GET_SIZE(self);
1952 for (; p < e; p++) {
1953 if (!isalpha(*p))
1954 return PyInt_FromLong(0);
1955 }
1956 return PyInt_FromLong(1);
1957}
1958
1959
1960static char isalnum__doc__[] =
1961"S.isalnum() -> int\n\
1962\n\
1963Return 1 if all characters in S are alphanumeric\n\
1964and there is at least one character in S, 0 otherwise.";
1965
1966static PyObject*
1967string_isalnum(PyUnicodeObject *self, PyObject *args)
1968{
1969 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1970 register const unsigned char *e;
1971
1972 if (!PyArg_NoArgs(args))
1973 return NULL;
1974
1975 /* Shortcut for single character strings */
1976 if (PyString_GET_SIZE(self) == 1 &&
1977 isalnum(*p))
1978 return PyInt_FromLong(1);
1979
1980 /* Special case for empty strings */
1981 if (PyString_GET_SIZE(self) == 0)
1982 return PyInt_FromLong(0);
1983
1984 e = p + PyString_GET_SIZE(self);
1985 for (; p < e; p++) {
1986 if (!isalnum(*p))
1987 return PyInt_FromLong(0);
1988 }
1989 return PyInt_FromLong(1);
1990}
1991
1992
Guido van Rossum4c08d552000-03-10 22:55:18 +00001993static char isdigit__doc__[] =
1994"S.isdigit() -> int\n\
1995\n\
1996Return 1 if there are only digit characters in S,\n\
19970 otherwise.";
1998
1999static PyObject*
2000string_isdigit(PyStringObject *self, PyObject *args)
2001{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002002 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2003 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002004
2005 if (!PyArg_NoArgs(args))
2006 return NULL;
2007
2008 /* Shortcut for single character strings */
2009 if (PyString_GET_SIZE(self) == 1 &&
2010 isdigit(*p))
2011 return PyInt_FromLong(1);
2012
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002013 /* Special case for empty strings */
2014 if (PyString_GET_SIZE(self) == 0)
2015 return PyInt_FromLong(0);
2016
Guido van Rossum4c08d552000-03-10 22:55:18 +00002017 e = p + PyString_GET_SIZE(self);
2018 for (; p < e; p++) {
2019 if (!isdigit(*p))
2020 return PyInt_FromLong(0);
2021 }
2022 return PyInt_FromLong(1);
2023}
2024
2025
2026static char islower__doc__[] =
2027"S.islower() -> int\n\
2028\n\
2029Return 1 if all cased characters in S are lowercase and there is\n\
2030at least one cased character in S, 0 otherwise.";
2031
2032static PyObject*
2033string_islower(PyStringObject *self, PyObject *args)
2034{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002035 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2036 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002037 int cased;
2038
2039 if (!PyArg_NoArgs(args))
2040 return NULL;
2041
2042 /* Shortcut for single character strings */
2043 if (PyString_GET_SIZE(self) == 1)
2044 return PyInt_FromLong(islower(*p) != 0);
2045
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002046 /* Special case for empty strings */
2047 if (PyString_GET_SIZE(self) == 0)
2048 return PyInt_FromLong(0);
2049
Guido van Rossum4c08d552000-03-10 22:55:18 +00002050 e = p + PyString_GET_SIZE(self);
2051 cased = 0;
2052 for (; p < e; p++) {
2053 if (isupper(*p))
2054 return PyInt_FromLong(0);
2055 else if (!cased && islower(*p))
2056 cased = 1;
2057 }
2058 return PyInt_FromLong(cased);
2059}
2060
2061
2062static char isupper__doc__[] =
2063"S.isupper() -> int\n\
2064\n\
2065Return 1 if all cased characters in S are uppercase and there is\n\
2066at least one cased character in S, 0 otherwise.";
2067
2068static PyObject*
2069string_isupper(PyStringObject *self, PyObject *args)
2070{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002071 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2072 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002073 int cased;
2074
2075 if (!PyArg_NoArgs(args))
2076 return NULL;
2077
2078 /* Shortcut for single character strings */
2079 if (PyString_GET_SIZE(self) == 1)
2080 return PyInt_FromLong(isupper(*p) != 0);
2081
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002082 /* Special case for empty strings */
2083 if (PyString_GET_SIZE(self) == 0)
2084 return PyInt_FromLong(0);
2085
Guido van Rossum4c08d552000-03-10 22:55:18 +00002086 e = p + PyString_GET_SIZE(self);
2087 cased = 0;
2088 for (; p < e; p++) {
2089 if (islower(*p))
2090 return PyInt_FromLong(0);
2091 else if (!cased && isupper(*p))
2092 cased = 1;
2093 }
2094 return PyInt_FromLong(cased);
2095}
2096
2097
2098static char istitle__doc__[] =
2099"S.istitle() -> int\n\
2100\n\
2101Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2102may only follow uncased characters and lowercase characters only cased\n\
2103ones. Return 0 otherwise.";
2104
2105static PyObject*
2106string_istitle(PyStringObject *self, PyObject *args)
2107{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002108 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2109 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002110 int cased, previous_is_cased;
2111
2112 if (!PyArg_NoArgs(args))
2113 return NULL;
2114
2115 /* Shortcut for single character strings */
2116 if (PyString_GET_SIZE(self) == 1)
2117 return PyInt_FromLong(isupper(*p) != 0);
2118
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002119 /* Special case for empty strings */
2120 if (PyString_GET_SIZE(self) == 0)
2121 return PyInt_FromLong(0);
2122
Guido van Rossum4c08d552000-03-10 22:55:18 +00002123 e = p + PyString_GET_SIZE(self);
2124 cased = 0;
2125 previous_is_cased = 0;
2126 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002127 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002128
2129 if (isupper(ch)) {
2130 if (previous_is_cased)
2131 return PyInt_FromLong(0);
2132 previous_is_cased = 1;
2133 cased = 1;
2134 }
2135 else if (islower(ch)) {
2136 if (!previous_is_cased)
2137 return PyInt_FromLong(0);
2138 previous_is_cased = 1;
2139 cased = 1;
2140 }
2141 else
2142 previous_is_cased = 0;
2143 }
2144 return PyInt_FromLong(cased);
2145}
2146
2147
2148static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002149"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150\n\
2151Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002152Line breaks are not included in the resulting list unless keepends\n\
2153is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002154
2155#define SPLIT_APPEND(data, left, right) \
2156 str = PyString_FromStringAndSize(data + left, right - left); \
2157 if (!str) \
2158 goto onError; \
2159 if (PyList_Append(list, str)) { \
2160 Py_DECREF(str); \
2161 goto onError; \
2162 } \
2163 else \
2164 Py_DECREF(str);
2165
2166static PyObject*
2167string_splitlines(PyStringObject *self, PyObject *args)
2168{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169 register int i;
2170 register int j;
2171 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002172 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002173 PyObject *list;
2174 PyObject *str;
2175 char *data;
2176
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002177 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002178 return NULL;
2179
2180 data = PyString_AS_STRING(self);
2181 len = PyString_GET_SIZE(self);
2182
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183 list = PyList_New(0);
2184 if (!list)
2185 goto onError;
2186
2187 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002188 int eol;
2189
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190 /* Find a line and append it */
2191 while (i < len && data[i] != '\n' && data[i] != '\r')
2192 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193
2194 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002195 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196 if (i < len) {
2197 if (data[i] == '\r' && i + 1 < len &&
2198 data[i+1] == '\n')
2199 i += 2;
2200 else
2201 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002202 if (keepends)
2203 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002205 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206 j = i;
2207 }
2208 if (j < len) {
2209 SPLIT_APPEND(data, j, len);
2210 }
2211
2212 return list;
2213
2214 onError:
2215 Py_DECREF(list);
2216 return NULL;
2217}
2218
2219#undef SPLIT_APPEND
2220
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221
2222static PyMethodDef
2223string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224 /* Counterparts of the obsolete stropmodule functions; except
2225 string.maketrans(). */
2226 {"join", (PyCFunction)string_join, 1, join__doc__},
2227 {"split", (PyCFunction)string_split, 1, split__doc__},
2228 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2229 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2230 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2231 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2232 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2233 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2234 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002235 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2236 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2238 {"count", (PyCFunction)string_count, 1, count__doc__},
2239 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2240 {"find", (PyCFunction)string_find, 1, find__doc__},
2241 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2244 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2245 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2246 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2248 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2249 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002250 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2251 {"title", (PyCFunction)string_title, 1, title__doc__},
2252 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2253 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2254 {"center", (PyCFunction)string_center, 1, center__doc__},
2255 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2256 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2257#if 0
2258 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2259#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 {NULL, NULL} /* sentinel */
2261};
2262
2263static PyObject *
2264string_getattr(s, name)
2265 PyStringObject *s;
2266 char *name;
2267{
2268 return Py_FindMethod(string_methods, (PyObject*)s, name);
2269}
2270
2271
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002272PyTypeObject PyString_Type = {
2273 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002274 0,
2275 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002276 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002277 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002278 (destructor)string_dealloc, /*tp_dealloc*/
2279 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002280 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002281 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002282 (cmpfunc)string_compare, /*tp_compare*/
2283 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002284 0, /*tp_as_number*/
2285 &string_as_sequence, /*tp_as_sequence*/
2286 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002287 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002288 0, /*tp_call*/
2289 0, /*tp_str*/
2290 0, /*tp_getattro*/
2291 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002292 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002293 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002294 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002295};
2296
2297void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002298PyString_Concat(pv, w)
2299 register PyObject **pv;
2300 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002301{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002302 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002303 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002304 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002305 if (w == NULL || !PyString_Check(*pv)) {
2306 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002307 *pv = NULL;
2308 return;
2309 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002310 v = string_concat((PyStringObject *) *pv, w);
2311 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002312 *pv = v;
2313}
2314
Guido van Rossum013142a1994-08-30 08:19:36 +00002315void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002316PyString_ConcatAndDel(pv, w)
2317 register PyObject **pv;
2318 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002319{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002320 PyString_Concat(pv, w);
2321 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002322}
2323
2324
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002325/* The following function breaks the notion that strings are immutable:
2326 it changes the size of a string. We get away with this only if there
2327 is only one module referencing the object. You can also think of it
2328 as creating a new string object and destroying the old one, only
2329 more efficiently. In any case, don't use this if the string may
2330 already be known to some other part of the code... */
2331
2332int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002333_PyString_Resize(pv, newsize)
2334 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002335 int newsize;
2336{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002337 register PyObject *v;
2338 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002339 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002340 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002341 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002342 Py_DECREF(v);
2343 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002344 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002345 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002346 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002347#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002348 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002349#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002350 _Py_ForgetReference(v);
2351 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002352 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002353 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002354 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002355 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002356 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002357 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002358 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002359 _Py_NewReference(*pv);
2360 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002361 sv->ob_size = newsize;
2362 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002363 return 0;
2364}
Guido van Rossume5372401993-03-16 12:15:04 +00002365
2366/* Helpers for formatstring */
2367
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002368static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002369getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002370 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002371 int arglen;
2372 int *p_argidx;
2373{
2374 int argidx = *p_argidx;
2375 if (argidx < arglen) {
2376 (*p_argidx)++;
2377 if (arglen < 0)
2378 return args;
2379 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002380 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002381 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002382 PyErr_SetString(PyExc_TypeError,
2383 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002384 return NULL;
2385}
2386
2387#define F_LJUST (1<<0)
2388#define F_SIGN (1<<1)
2389#define F_BLANK (1<<2)
2390#define F_ALT (1<<3)
2391#define F_ZERO (1<<4)
2392
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002393static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002394formatfloat(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002395 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002396 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002397 int flags;
2398 int prec;
2399 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002400 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002401{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002402 /* fmt = '%#.' + `prec` + `type`
2403 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002404 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002405 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002406 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002407 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002408 if (prec < 0)
2409 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002410 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2411 type = 'g';
2412 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002413 /* worst case length calc to ensure no buffer overrun:
2414 fmt = %#.<prec>g
2415 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2416 for any double rep.)
2417 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2418 If prec=0 the effective precision is 1 (the leading digit is
2419 always given), therefore increase by one to 10+prec. */
2420 if (buflen <= (size_t)10 + (size_t)prec) {
2421 PyErr_SetString(PyExc_OverflowError,
2422 "formatted float is too long (precision too long?)");
2423 return -1;
2424 }
Guido van Rossume5372401993-03-16 12:15:04 +00002425 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002426 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002427}
2428
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002429static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002430formatint(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002431 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002432 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002433 int flags;
2434 int prec;
2435 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002436 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002437{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002438 /* fmt = '%#.' + `prec` + 'l' + `type`
2439 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002440 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002441 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002442 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002443 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002444 if (prec < 0)
2445 prec = 1;
2446 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002447 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2448 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2449 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2450 PyErr_SetString(PyExc_OverflowError,
2451 "formatted integer is too long (precision too long?)");
2452 return -1;
2453 }
Guido van Rossume5372401993-03-16 12:15:04 +00002454 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002455 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002456}
2457
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002458static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002459formatchar(buf, buflen, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002460 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002461 size_t buflen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002462 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002463{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002464 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002465 if (PyString_Check(v)) {
2466 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002467 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002468 }
2469 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002470 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002471 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002472 }
2473 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002474 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002475}
2476
Guido van Rossum013142a1994-08-30 08:19:36 +00002477
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002478/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2479
2480 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2481 chars are formatted. XXX This is a magic number. Each formatting
2482 routine does bounds checking to ensure no overflow, but a better
2483 solution may be to malloc a buffer of appropriate size for each
2484 format. For now, the current solution is sufficient.
2485*/
2486#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002487
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002488PyObject *
2489PyString_Format(format, args)
2490 PyObject *format;
2491 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002492{
2493 char *fmt, *res;
2494 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002495 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002496 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002497 PyObject *dict = NULL;
2498 if (format == NULL || !PyString_Check(format) || args == NULL) {
2499 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002500 return NULL;
2501 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002502 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002503 fmt = PyString_AsString(format);
2504 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002505 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002506 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002507 if (result == NULL)
2508 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002509 res = PyString_AsString(result);
2510 if (PyTuple_Check(args)) {
2511 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002512 argidx = 0;
2513 }
2514 else {
2515 arglen = -1;
2516 argidx = -2;
2517 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002518 if (args->ob_type->tp_as_mapping)
2519 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002520 while (--fmtcnt >= 0) {
2521 if (*fmt != '%') {
2522 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002523 rescnt = fmtcnt + 100;
2524 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002525 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002526 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002527 res = PyString_AsString(result)
2528 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002529 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002530 }
2531 *res++ = *fmt++;
2532 }
2533 else {
2534 /* Got a format specifier */
2535 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002536 int width = -1;
2537 int prec = -1;
2538 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002539 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002540 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002541 PyObject *v = NULL;
2542 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002543 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002544 int sign;
2545 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002546 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002547 char *fmt_start = fmt;
2548
Guido van Rossumda9c2711996-12-05 21:58:58 +00002549 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002550 if (*fmt == '(') {
2551 char *keystart;
2552 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002553 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002554 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002555
2556 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002557 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002558 "format requires a mapping");
2559 goto error;
2560 }
2561 ++fmt;
2562 --fmtcnt;
2563 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002564 /* Skip over balanced parentheses */
2565 while (pcount > 0 && --fmtcnt >= 0) {
2566 if (*fmt == ')')
2567 --pcount;
2568 else if (*fmt == '(')
2569 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002570 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002571 }
2572 keylen = fmt - keystart - 1;
2573 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002574 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002575 "incomplete format key");
2576 goto error;
2577 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002578 key = PyString_FromStringAndSize(keystart,
2579 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002580 if (key == NULL)
2581 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002582 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002583 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002584 args_owned = 0;
2585 }
2586 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002587 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002588 if (args == NULL) {
2589 goto error;
2590 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002591 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002592 arglen = -1;
2593 argidx = -2;
2594 }
Guido van Rossume5372401993-03-16 12:15:04 +00002595 while (--fmtcnt >= 0) {
2596 switch (c = *fmt++) {
2597 case '-': flags |= F_LJUST; continue;
2598 case '+': flags |= F_SIGN; continue;
2599 case ' ': flags |= F_BLANK; continue;
2600 case '#': flags |= F_ALT; continue;
2601 case '0': flags |= F_ZERO; continue;
2602 }
2603 break;
2604 }
2605 if (c == '*') {
2606 v = getnextarg(args, arglen, &argidx);
2607 if (v == NULL)
2608 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002609 if (!PyInt_Check(v)) {
2610 PyErr_SetString(PyExc_TypeError,
2611 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002612 goto error;
2613 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002614 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002615 if (width < 0) {
2616 flags |= F_LJUST;
2617 width = -width;
2618 }
Guido van Rossume5372401993-03-16 12:15:04 +00002619 if (--fmtcnt >= 0)
2620 c = *fmt++;
2621 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002622 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002623 width = c - '0';
2624 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002625 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002626 if (!isdigit(c))
2627 break;
2628 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002629 PyErr_SetString(
2630 PyExc_ValueError,
2631 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002632 goto error;
2633 }
2634 width = width*10 + (c - '0');
2635 }
2636 }
2637 if (c == '.') {
2638 prec = 0;
2639 if (--fmtcnt >= 0)
2640 c = *fmt++;
2641 if (c == '*') {
2642 v = getnextarg(args, arglen, &argidx);
2643 if (v == NULL)
2644 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002645 if (!PyInt_Check(v)) {
2646 PyErr_SetString(
2647 PyExc_TypeError,
2648 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002649 goto error;
2650 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002651 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002652 if (prec < 0)
2653 prec = 0;
2654 if (--fmtcnt >= 0)
2655 c = *fmt++;
2656 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002657 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002658 prec = c - '0';
2659 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002660 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002661 if (!isdigit(c))
2662 break;
2663 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002664 PyErr_SetString(
2665 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002666 "prec too big");
2667 goto error;
2668 }
2669 prec = prec*10 + (c - '0');
2670 }
2671 }
2672 } /* prec */
2673 if (fmtcnt >= 0) {
2674 if (c == 'h' || c == 'l' || c == 'L') {
2675 size = c;
2676 if (--fmtcnt >= 0)
2677 c = *fmt++;
2678 }
2679 }
2680 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002681 PyErr_SetString(PyExc_ValueError,
2682 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002683 goto error;
2684 }
2685 if (c != '%') {
2686 v = getnextarg(args, arglen, &argidx);
2687 if (v == NULL)
2688 goto error;
2689 }
2690 sign = 0;
2691 fill = ' ';
2692 switch (c) {
2693 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002694 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002695 len = 1;
2696 break;
2697 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002698 case 'r':
2699 if (PyUnicode_Check(v)) {
2700 fmt = fmt_start;
2701 goto unicode;
2702 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002703 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002704 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002705 else
2706 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002707 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002708 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002709 if (!PyString_Check(temp)) {
2710 PyErr_SetString(PyExc_TypeError,
2711 "%s argument has non-string str()");
2712 goto error;
2713 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002714 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002715 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002716 if (prec >= 0 && len > prec)
2717 len = prec;
2718 break;
2719 case 'i':
2720 case 'd':
2721 case 'u':
2722 case 'o':
2723 case 'x':
2724 case 'X':
2725 if (c == 'i')
2726 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002727 pbuf = formatbuf;
2728 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002729 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002730 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002731 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002732 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002733 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002734 if ((flags&F_ALT) &&
2735 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002736 pbuf[0] == '0' && pbuf[1] == c) {
2737 *res++ = *pbuf++;
2738 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002739 rescnt -= 2;
2740 len -= 2;
2741 width -= 2;
2742 if (width < 0)
2743 width = 0;
2744 }
2745 }
Guido van Rossume5372401993-03-16 12:15:04 +00002746 break;
2747 case 'e':
2748 case 'E':
2749 case 'f':
2750 case 'g':
2751 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002752 pbuf = formatbuf;
2753 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002754 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002755 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002756 sign = 1;
2757 if (flags&F_ZERO)
2758 fill = '0';
2759 break;
2760 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002761 pbuf = formatbuf;
2762 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002763 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002764 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002765 break;
2766 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002767 PyErr_Format(PyExc_ValueError,
2768 "unsupported format character '%c' (0x%x)",
2769 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002770 goto error;
2771 }
2772 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002773 if (*pbuf == '-' || *pbuf == '+') {
2774 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002775 len--;
2776 }
2777 else if (flags & F_SIGN)
2778 sign = '+';
2779 else if (flags & F_BLANK)
2780 sign = ' ';
2781 else
2782 sign = '\0';
2783 }
2784 if (width < len)
2785 width = len;
2786 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002787 reslen -= rescnt;
2788 rescnt = width + fmtcnt + 100;
2789 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002790 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002791 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002792 res = PyString_AsString(result)
2793 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002794 }
2795 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002796 if (fill != ' ')
2797 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002798 rescnt--;
2799 if (width > len)
2800 width--;
2801 }
2802 if (width > len && !(flags&F_LJUST)) {
2803 do {
2804 --rescnt;
2805 *res++ = fill;
2806 } while (--width > len);
2807 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002808 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002809 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002810 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002811 res += len;
2812 rescnt -= len;
2813 while (--width >= len) {
2814 --rescnt;
2815 *res++ = ' ';
2816 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002817 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002818 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002819 "not all arguments converted");
2820 goto error;
2821 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002822 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002823 } /* '%' */
2824 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002825 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002826 PyErr_SetString(PyExc_TypeError,
2827 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002828 goto error;
2829 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002830 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002831 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002832 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002833 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002834 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002835
2836 unicode:
2837 if (args_owned) {
2838 Py_DECREF(args);
2839 args_owned = 0;
2840 }
2841 /* Fiddle args right (remove the first argidx-1 arguments) */
2842 --argidx;
2843 if (PyTuple_Check(orig_args) && argidx > 0) {
2844 PyObject *v;
2845 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2846 v = PyTuple_New(n);
2847 if (v == NULL)
2848 goto error;
2849 while (--n >= 0) {
2850 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2851 Py_INCREF(w);
2852 PyTuple_SET_ITEM(v, n, w);
2853 }
2854 args = v;
2855 } else {
2856 Py_INCREF(orig_args);
2857 args = orig_args;
2858 }
2859 /* Paste rest of format string to what we have of the result
2860 string; we reuse result for this */
2861 rescnt = res - PyString_AS_STRING(result);
2862 fmtcnt = PyString_GET_SIZE(format) - \
2863 (fmt - PyString_AS_STRING(format));
2864 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2865 Py_DECREF(args);
2866 goto error;
2867 }
2868 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2869 format = result;
2870 /* Let Unicode do its magic */
2871 result = PyUnicode_Format(format, args);
2872 Py_DECREF(format);
2873 Py_DECREF(args);
2874 return result;
2875
Guido van Rossume5372401993-03-16 12:15:04 +00002876 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002877 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002878 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002879 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002880 }
Guido van Rossume5372401993-03-16 12:15:04 +00002881 return NULL;
2882}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002883
2884
2885#ifdef INTERN_STRINGS
2886
2887static PyObject *interned;
2888
2889void
2890PyString_InternInPlace(p)
2891 PyObject **p;
2892{
2893 register PyStringObject *s = (PyStringObject *)(*p);
2894 PyObject *t;
2895 if (s == NULL || !PyString_Check(s))
2896 Py_FatalError("PyString_InternInPlace: strings only please!");
2897 if ((t = s->ob_sinterned) != NULL) {
2898 if (t == (PyObject *)s)
2899 return;
2900 Py_INCREF(t);
2901 *p = t;
2902 Py_DECREF(s);
2903 return;
2904 }
2905 if (interned == NULL) {
2906 interned = PyDict_New();
2907 if (interned == NULL)
2908 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002909 }
2910 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2911 Py_INCREF(t);
2912 *p = s->ob_sinterned = t;
2913 Py_DECREF(s);
2914 return;
2915 }
2916 t = (PyObject *)s;
2917 if (PyDict_SetItem(interned, t, t) == 0) {
2918 s->ob_sinterned = t;
2919 return;
2920 }
2921 PyErr_Clear();
2922}
2923
2924
2925PyObject *
2926PyString_InternFromString(cp)
2927 const char *cp;
2928{
2929 PyObject *s = PyString_FromString(cp);
2930 if (s == NULL)
2931 return NULL;
2932 PyString_InternInPlace(&s);
2933 return s;
2934}
2935
2936#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002937
2938void
2939PyString_Fini()
2940{
2941 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002942 for (i = 0; i < UCHAR_MAX + 1; i++) {
2943 Py_XDECREF(characters[i]);
2944 characters[i] = NULL;
2945 }
2946#ifndef DONT_SHARE_SHORT_STRINGS
2947 Py_XDECREF(nullstring);
2948 nullstring = NULL;
2949#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002950#ifdef INTERN_STRINGS
2951 if (interned) {
2952 int pos, changed;
2953 PyObject *key, *value;
2954 do {
2955 changed = 0;
2956 pos = 0;
2957 while (PyDict_Next(interned, &pos, &key, &value)) {
2958 if (key->ob_refcnt == 2 && key == value) {
2959 PyDict_DelItem(interned, key);
2960 changed = 1;
2961 }
2962 }
2963 } while (changed);
2964 }
2965#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002966}