blob: b547aa34bdf5e89e84797ddeb29eff24c57b8773 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00009******************************************************************/
10
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000011/* String object implementation */
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000014
Guido van Rossum71160aa1997-06-03 18:03:18 +000015#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000016#include <ctype.h>
17
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018#ifdef COUNT_ALLOCS
19int null_strings, one_strings;
20#endif
21
Guido van Rossum03093a21994-09-28 15:51:32 +000022#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000023#include <limits.h>
24#else
25#ifndef UCHAR_MAX
26#define UCHAR_MAX 255
27#endif
28#endif
29
Guido van Rossumc0b618a1997-05-02 03:12:38 +000030static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000031#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000032static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000033#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000034
35/*
36 Newsizedstringobject() and newstringobject() try in certain cases
37 to share string objects. When the size of the string is zero,
38 these routines always return a pointer to the same string object;
39 when the size is one, they return a pointer to an already existing
40 object if the contents of the string is known. For
41 newstringobject() this is always the case, for
42 newsizedstringobject() this is the case when the first argument in
43 not NULL.
44 A common practice to allocate a string and then fill it in or
45 change it must be done carefully. It is only allowed to change the
46 contents of the string if the obect was gotten from
47 newsizedstringobject() with a NULL first argument, because in the
48 future these routines may try to do even more sharing of objects.
49*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000050PyObject *
51PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000052 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053 int size;
54{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000056#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000073#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000074
75 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000078 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000079 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000080 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081#ifdef CACHE_HASH
82 op->ob_shash = -1;
83#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000084#ifdef INTERN_STRINGS
85 op->ob_sinterned = NULL;
86#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000087 if (str != NULL)
88 memcpy(op->ob_sval, str, size);
89 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000090#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 if (size == 0) {
92 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
95 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000098#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100}
101
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102PyObject *
103PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000104 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000106 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000113#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000128#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000129
130 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000133 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000135 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136#ifdef CACHE_HASH
137 op->ob_shash = -1;
138#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000139#ifdef INTERN_STRINGS
140 op->ob_sinterned = NULL;
141#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000142 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000143#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 if (size == 0) {
145 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 } else if (size == 1) {
148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000151#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Guido van Rossum234f9421993-06-17 12:35:49 +0000155static void
Guido van Rossume5372401993-03-16 12:15:04 +0000156string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000157 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000158{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000159 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000160}
161
Guido van Rossumd7047b31995-01-02 19:07:15 +0000162int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163PyString_Size(op)
164 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000165{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000166 if (!PyString_Check(op)) {
167 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000168 return -1;
169 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000170 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000171}
172
173/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000174PyString_AsString(op)
175 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000176{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000177 if (!PyString_Check(op)) {
178 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000179 return NULL;
180 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000181 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000182}
183
184/* Methods */
185
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000186static int
Guido van Rossume5372401993-03-16 12:15:04 +0000187string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000188 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000189 FILE *fp;
190 int flags;
191{
192 int i;
193 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000194 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000195 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000196 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000197 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000198 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000199 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000200
201 /* figure out which quote to use; single is prefered */
202 quote = '\'';
203 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
204 quote = '"';
205
206 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000207 for (i = 0; i < op->ob_size; i++) {
208 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000209 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000210 fprintf(fp, "\\%c", c);
211 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000212 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000214 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000215 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000216 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000217 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000218}
219
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000220static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000221string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000222 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000223{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000224 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
225 PyObject *v;
226 if (newsize > INT_MAX) {
227 PyErr_SetString(PyExc_OverflowError,
228 "string is too large to make repr");
229 }
230 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000231 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000232 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000233 }
234 else {
235 register int i;
236 register char c;
237 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000238 int quote;
239
240 /* figure out which quote to use; single is prefered */
241 quote = '\'';
242 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
243 quote = '"';
244
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000245 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000246 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000247 for (i = 0; i < op->ob_size; i++) {
248 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000249 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000250 *p++ = '\\', *p++ = c;
251 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000252 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000253 while (*p != '\0')
254 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000255 }
256 else
257 *p++ = c;
258 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000259 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000261 _PyString_Resize(
262 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000263 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000265}
266
267static int
Guido van Rossume5372401993-03-16 12:15:04 +0000268string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000269 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000270{
271 return a->ob_size;
272}
273
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000274static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000275string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000276 register PyStringObject *a;
277 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278{
279 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000280 register PyStringObject *op;
281 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000282 if (PyUnicode_Check(bb))
283 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000284 PyErr_Format(PyExc_TypeError,
285 "cannot add type \"%.200s\" to string",
286 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000287 return NULL;
288 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000289#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290 /* Optimize cases with empty left or right operand */
291 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000293 return bb;
294 }
295 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000296 Py_INCREF(a);
297 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000298 }
299 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000300 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000301 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000302 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000303 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000304 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000305 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000306#ifdef CACHE_HASH
307 op->ob_shash = -1;
308#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000309#ifdef INTERN_STRINGS
310 op->ob_sinterned = NULL;
311#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000312 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
313 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
314 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000315 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000316#undef b
317}
318
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000319static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000320string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000321 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322 register int n;
323{
324 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000325 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000326 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000327 if (n < 0)
328 n = 0;
329 size = a->ob_size * n;
330 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000331 Py_INCREF(a);
332 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000334 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000335 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000336 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000337 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000338 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000339 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000340#ifdef CACHE_HASH
341 op->ob_shash = -1;
342#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000343#ifdef INTERN_STRINGS
344 op->ob_sinterned = NULL;
345#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000346 for (i = 0; i < size; i += a->ob_size)
347 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
348 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000349 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000350}
351
352/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
353
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000354static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000355string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000356 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000357 register int i, j; /* May be negative! */
358{
359 if (i < 0)
360 i = 0;
361 if (j < 0)
362 j = 0; /* Avoid signed/unsigned bug in next line */
363 if (j > a->ob_size)
364 j = a->ob_size;
365 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000366 Py_INCREF(a);
367 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000368 }
369 if (j < i)
370 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000371 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000372}
373
Guido van Rossum9284a572000-03-07 15:53:43 +0000374static int
375string_contains(a, el)
376PyObject *a, *el;
377{
378 register char *s, *end;
379 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000380 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000381 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000382 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000383 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000384 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000385 return -1;
386 }
387 c = PyString_AsString(el)[0];
388 s = PyString_AsString(a);
389 end = s + PyString_Size(a);
390 while (s < end) {
391 if (c == *s++)
392 return 1;
393 }
394 return 0;
395}
396
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000397static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000398string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000399 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000400 register int i;
401{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000402 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000403 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000404 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000405 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406 return NULL;
407 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000408 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000410#ifdef COUNT_ALLOCS
411 if (v != NULL)
412 one_strings++;
413#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000414 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000416 if (v == NULL)
417 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000418 characters[c] = (PyStringObject *) v;
419 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000420 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000422 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423}
424
425static int
Guido van Rossume5372401993-03-16 12:15:04 +0000426string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000428{
Guido van Rossum253919f1991-02-13 23:18:39 +0000429 int len_a = a->ob_size, len_b = b->ob_size;
430 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000431 int cmp;
432 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000433 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000434 if (cmp == 0)
435 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
436 if (cmp != 0)
437 return cmp;
438 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000439 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440}
441
Guido van Rossum9bfef441993-03-29 10:43:31 +0000442static long
443string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000444 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000445{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000446 register int len;
447 register unsigned char *p;
448 register long x;
449
450#ifdef CACHE_HASH
451 if (a->ob_shash != -1)
452 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000453#ifdef INTERN_STRINGS
454 if (a->ob_sinterned != NULL)
455 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000456 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000457#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000458#endif
459 len = a->ob_size;
460 p = (unsigned char *) a->ob_sval;
461 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000462 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000463 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000464 x ^= a->ob_size;
465 if (x == -1)
466 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000467#ifdef CACHE_HASH
468 a->ob_shash = x;
469#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000470 return x;
471}
472
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000473static int
474string_buffer_getreadbuf(self, index, ptr)
475 PyStringObject *self;
476 int index;
477 const void **ptr;
478{
479 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000480 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000481 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000482 return -1;
483 }
484 *ptr = (void *)self->ob_sval;
485 return self->ob_size;
486}
487
488static int
489string_buffer_getwritebuf(self, index, ptr)
490 PyStringObject *self;
491 int index;
492 const void **ptr;
493{
Guido van Rossum045e6881997-09-08 18:30:11 +0000494 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000495 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000496 return -1;
497}
498
499static int
500string_buffer_getsegcount(self, lenp)
501 PyStringObject *self;
502 int *lenp;
503{
504 if ( lenp )
505 *lenp = self->ob_size;
506 return 1;
507}
508
Guido van Rossum1db70701998-10-08 02:18:52 +0000509static int
510string_buffer_getcharbuf(self, index, ptr)
511 PyStringObject *self;
512 int index;
513 const char **ptr;
514{
515 if ( index != 0 ) {
516 PyErr_SetString(PyExc_SystemError,
517 "accessing non-existent string segment");
518 return -1;
519 }
520 *ptr = self->ob_sval;
521 return self->ob_size;
522}
523
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000524static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000525 (inquiry)string_length, /*sq_length*/
526 (binaryfunc)string_concat, /*sq_concat*/
527 (intargfunc)string_repeat, /*sq_repeat*/
528 (intargfunc)string_item, /*sq_item*/
529 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000530 0, /*sq_ass_item*/
531 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000532 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533};
534
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000535static PyBufferProcs string_as_buffer = {
536 (getreadbufferproc)string_buffer_getreadbuf,
537 (getwritebufferproc)string_buffer_getwritebuf,
538 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000539 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000540};
541
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000542
543
544#define LEFTSTRIP 0
545#define RIGHTSTRIP 1
546#define BOTHSTRIP 2
547
548
549static PyObject *
550split_whitespace(s, len, maxsplit)
551 char *s;
552 int len;
553 int maxsplit;
554{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000555 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000556 PyObject* item;
557 PyObject *list = PyList_New(0);
558
559 if (list == NULL)
560 return NULL;
561
Guido van Rossum4c08d552000-03-10 22:55:18 +0000562 for (i = j = 0; i < len; ) {
563 while (i < len && isspace(Py_CHARMASK(s[i])))
564 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000565 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000566 while (i < len && !isspace(Py_CHARMASK(s[i])))
567 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000568 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000569 if (maxsplit-- <= 0)
570 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000571 item = PyString_FromStringAndSize(s+j, (int)(i-j));
572 if (item == NULL)
573 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000574 err = PyList_Append(list, item);
575 Py_DECREF(item);
576 if (err < 0)
577 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000578 while (i < len && isspace(Py_CHARMASK(s[i])))
579 i++;
580 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000581 }
582 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000583 if (j < len) {
584 item = PyString_FromStringAndSize(s+j, (int)(len - j));
585 if (item == NULL)
586 goto finally;
587 err = PyList_Append(list, item);
588 Py_DECREF(item);
589 if (err < 0)
590 goto finally;
591 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000592 return list;
593 finally:
594 Py_DECREF(list);
595 return NULL;
596}
597
598
599static char split__doc__[] =
600"S.split([sep [,maxsplit]]) -> list of strings\n\
601\n\
602Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000603delimiter string. If maxsplit is given, at most maxsplit\n\
604splits are done. If sep is not specified, any whitespace string\n\
605is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000606
607static PyObject *
608string_split(self, args)
609 PyStringObject *self;
610 PyObject *args;
611{
612 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000613 int maxsplit = -1;
614 const char *s = PyString_AS_STRING(self), *sub;
615 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000616
Guido van Rossum4c08d552000-03-10 22:55:18 +0000617 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000618 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000619 if (maxsplit < 0)
620 maxsplit = INT_MAX;
621 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000622 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000623 if (PyString_Check(subobj)) {
624 sub = PyString_AS_STRING(subobj);
625 n = PyString_GET_SIZE(subobj);
626 }
627 else if (PyUnicode_Check(subobj))
628 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
629 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
630 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000631 if (n == 0) {
632 PyErr_SetString(PyExc_ValueError, "empty separator");
633 return NULL;
634 }
635
636 list = PyList_New(0);
637 if (list == NULL)
638 return NULL;
639
640 i = j = 0;
641 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000642 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000643 if (maxsplit-- <= 0)
644 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000645 item = PyString_FromStringAndSize(s+j, (int)(i-j));
646 if (item == NULL)
647 goto fail;
648 err = PyList_Append(list, item);
649 Py_DECREF(item);
650 if (err < 0)
651 goto fail;
652 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000653 }
654 else
655 i++;
656 }
657 item = PyString_FromStringAndSize(s+j, (int)(len-j));
658 if (item == NULL)
659 goto fail;
660 err = PyList_Append(list, item);
661 Py_DECREF(item);
662 if (err < 0)
663 goto fail;
664
665 return list;
666
667 fail:
668 Py_DECREF(list);
669 return NULL;
670}
671
672
673static char join__doc__[] =
674"S.join(sequence) -> string\n\
675\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000676Return a string which is the concatenation of the strings in the\n\
677sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000678
679static PyObject *
680string_join(self, args)
681 PyStringObject *self;
682 PyObject *args;
683{
684 char *sep = PyString_AS_STRING(self);
685 int seplen = PyString_GET_SIZE(self);
686 PyObject *res = NULL;
687 int reslen = 0;
688 char *p;
689 int seqlen = 0;
690 int sz = 100;
691 int i, slen;
692 PyObject *seq;
693
Guido van Rossum43713e52000-02-29 13:59:29 +0000694 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000695 return NULL;
696
697 seqlen = PySequence_Length(seq);
698 if (seqlen < 0 && PyErr_Occurred())
699 return NULL;
700
701 if (seqlen == 1) {
702 /* Optimization if there's only one item */
703 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000704 if (item == NULL)
705 return NULL;
706 if (!PyString_Check(item) &&
707 !PyUnicode_Check(item)) {
708 PyErr_SetString(PyExc_TypeError,
709 "first argument must be sequence of strings");
710 Py_DECREF(item);
711 return NULL;
712 }
713 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000714 }
715 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
716 return NULL;
717 p = PyString_AsString(res);
718
719 /* optimize for lists. all others (tuples and arbitrary sequences)
720 * just use the abstract interface.
721 */
722 if (PyList_Check(seq)) {
723 for (i = 0; i < seqlen; i++) {
724 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000725 if (!PyString_Check(item)){
726 if (PyUnicode_Check(item)) {
727 Py_DECREF(res);
728 return PyUnicode_Join(
729 (PyObject *)self,
730 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000731 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000732 PyErr_Format(PyExc_TypeError,
733 "sequence item %i not a string",
734 i);
735 goto finally;
736 }
737 slen = PyString_GET_SIZE(item);
738 while (reslen + slen + seplen >= sz) {
739 if (_PyString_Resize(&res, sz*2))
740 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000741 sz *= 2;
742 p = PyString_AsString(res) + reslen;
743 }
744 if (i > 0) {
745 memcpy(p, sep, seplen);
746 p += seplen;
747 reslen += seplen;
748 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000749 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000750 p += slen;
751 reslen += slen;
752 }
753 }
754 else {
755 for (i = 0; i < seqlen; i++) {
756 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000757 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000758 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000759 if (!PyString_Check(item)){
760 if (PyUnicode_Check(item)) {
761 Py_DECREF(res);
762 Py_DECREF(item);
763 return PyUnicode_Join(
764 (PyObject *)self,
765 seq);
766 }
767 Py_DECREF(item);
768 PyErr_Format(PyExc_TypeError,
769 "sequence item %i not a string",
770 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000771 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000772 }
773 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000774 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000775 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000776 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000777 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000778 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000779 sz *= 2;
780 p = PyString_AsString(res) + reslen;
781 }
782 if (i > 0) {
783 memcpy(p, sep, seplen);
784 p += seplen;
785 reslen += seplen;
786 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000787 memcpy(p, PyString_AS_STRING(item), slen);
788 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000789 p += slen;
790 reslen += slen;
791 }
792 }
793 if (_PyString_Resize(&res, reslen))
794 goto finally;
795 return res;
796
797 finally:
798 Py_DECREF(res);
799 return NULL;
800}
801
802
803
804static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000805string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000806 PyStringObject *self;
807 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000808 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000809{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000810 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000811 int len = PyString_GET_SIZE(self);
812 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000813 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000814
Guido van Rossumc6821402000-05-08 14:08:05 +0000815 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
816 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817 return -2;
818 if (PyString_Check(subobj)) {
819 sub = PyString_AS_STRING(subobj);
820 n = PyString_GET_SIZE(subobj);
821 }
822 else if (PyUnicode_Check(subobj))
823 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
824 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 return -2;
826
827 if (last > len)
828 last = len;
829 if (last < 0)
830 last += len;
831 if (last < 0)
832 last = 0;
833 if (i < 0)
834 i += len;
835 if (i < 0)
836 i = 0;
837
Guido van Rossum4c08d552000-03-10 22:55:18 +0000838 if (dir > 0) {
839 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000840 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000841 last -= n;
842 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000843 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000844 return (long)i;
845 }
846 else {
847 int j;
848
849 if (n == 0 && i <= last)
850 return (long)last;
851 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000852 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000853 return (long)j;
854 }
855
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000856 return -1;
857}
858
859
860static char find__doc__[] =
861"S.find(sub [,start [,end]]) -> int\n\
862\n\
863Return the lowest index in S where substring sub is found,\n\
864such that sub is contained within s[start,end]. Optional\n\
865arguments start and end are interpreted as in slice notation.\n\
866\n\
867Return -1 on failure.";
868
869static PyObject *
870string_find(self, args)
871 PyStringObject *self;
872 PyObject *args;
873{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000874 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875 if (result == -2)
876 return NULL;
877 return PyInt_FromLong(result);
878}
879
880
881static char index__doc__[] =
882"S.index(sub [,start [,end]]) -> int\n\
883\n\
884Like S.find() but raise ValueError when the substring is not found.";
885
886static PyObject *
887string_index(self, args)
888 PyStringObject *self;
889 PyObject *args;
890{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000891 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000892 if (result == -2)
893 return NULL;
894 if (result == -1) {
895 PyErr_SetString(PyExc_ValueError,
896 "substring not found in string.index");
897 return NULL;
898 }
899 return PyInt_FromLong(result);
900}
901
902
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000903static char rfind__doc__[] =
904"S.rfind(sub [,start [,end]]) -> int\n\
905\n\
906Return the highest index in S where substring sub is found,\n\
907such that sub is contained within s[start,end]. Optional\n\
908arguments start and end are interpreted as in slice notation.\n\
909\n\
910Return -1 on failure.";
911
912static PyObject *
913string_rfind(self, args)
914 PyStringObject *self;
915 PyObject *args;
916{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000917 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000918 if (result == -2)
919 return NULL;
920 return PyInt_FromLong(result);
921}
922
923
924static char rindex__doc__[] =
925"S.rindex(sub [,start [,end]]) -> int\n\
926\n\
927Like S.rfind() but raise ValueError when the substring is not found.";
928
929static PyObject *
930string_rindex(self, args)
931 PyStringObject *self;
932 PyObject *args;
933{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000934 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000935 if (result == -2)
936 return NULL;
937 if (result == -1) {
938 PyErr_SetString(PyExc_ValueError,
939 "substring not found in string.rindex");
940 return NULL;
941 }
942 return PyInt_FromLong(result);
943}
944
945
946static PyObject *
947do_strip(self, args, striptype)
948 PyStringObject *self;
949 PyObject *args;
950 int striptype;
951{
952 char *s = PyString_AS_STRING(self);
953 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000954
Guido van Rossum43713e52000-02-29 13:59:29 +0000955 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000956 return NULL;
957
958 i = 0;
959 if (striptype != RIGHTSTRIP) {
960 while (i < len && isspace(Py_CHARMASK(s[i]))) {
961 i++;
962 }
963 }
964
965 j = len;
966 if (striptype != LEFTSTRIP) {
967 do {
968 j--;
969 } while (j >= i && isspace(Py_CHARMASK(s[j])));
970 j++;
971 }
972
973 if (i == 0 && j == len) {
974 Py_INCREF(self);
975 return (PyObject*)self;
976 }
977 else
978 return PyString_FromStringAndSize(s+i, j-i);
979}
980
981
982static char strip__doc__[] =
983"S.strip() -> string\n\
984\n\
985Return a copy of the string S with leading and trailing\n\
986whitespace removed.";
987
988static PyObject *
989string_strip(self, args)
990 PyStringObject *self;
991 PyObject *args;
992{
993 return do_strip(self, args, BOTHSTRIP);
994}
995
996
997static char lstrip__doc__[] =
998"S.lstrip() -> string\n\
999\n\
1000Return a copy of the string S with leading whitespace removed.";
1001
1002static PyObject *
1003string_lstrip(self, args)
1004 PyStringObject *self;
1005 PyObject *args;
1006{
1007 return do_strip(self, args, LEFTSTRIP);
1008}
1009
1010
1011static char rstrip__doc__[] =
1012"S.rstrip() -> string\n\
1013\n\
1014Return a copy of the string S with trailing whitespace removed.";
1015
1016static PyObject *
1017string_rstrip(self, args)
1018 PyStringObject *self;
1019 PyObject *args;
1020{
1021 return do_strip(self, args, RIGHTSTRIP);
1022}
1023
1024
1025static char lower__doc__[] =
1026"S.lower() -> string\n\
1027\n\
1028Return a copy of the string S converted to lowercase.";
1029
1030static PyObject *
1031string_lower(self, args)
1032 PyStringObject *self;
1033 PyObject *args;
1034{
1035 char *s = PyString_AS_STRING(self), *s_new;
1036 int i, n = PyString_GET_SIZE(self);
1037 PyObject *new;
1038
Guido van Rossum43713e52000-02-29 13:59:29 +00001039 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001040 return NULL;
1041 new = PyString_FromStringAndSize(NULL, n);
1042 if (new == NULL)
1043 return NULL;
1044 s_new = PyString_AsString(new);
1045 for (i = 0; i < n; i++) {
1046 int c = Py_CHARMASK(*s++);
1047 if (isupper(c)) {
1048 *s_new = tolower(c);
1049 } else
1050 *s_new = c;
1051 s_new++;
1052 }
1053 return new;
1054}
1055
1056
1057static char upper__doc__[] =
1058"S.upper() -> string\n\
1059\n\
1060Return a copy of the string S converted to uppercase.";
1061
1062static PyObject *
1063string_upper(self, args)
1064 PyStringObject *self;
1065 PyObject *args;
1066{
1067 char *s = PyString_AS_STRING(self), *s_new;
1068 int i, n = PyString_GET_SIZE(self);
1069 PyObject *new;
1070
Guido van Rossum43713e52000-02-29 13:59:29 +00001071 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001072 return NULL;
1073 new = PyString_FromStringAndSize(NULL, n);
1074 if (new == NULL)
1075 return NULL;
1076 s_new = PyString_AsString(new);
1077 for (i = 0; i < n; i++) {
1078 int c = Py_CHARMASK(*s++);
1079 if (islower(c)) {
1080 *s_new = toupper(c);
1081 } else
1082 *s_new = c;
1083 s_new++;
1084 }
1085 return new;
1086}
1087
1088
Guido van Rossum4c08d552000-03-10 22:55:18 +00001089static char title__doc__[] =
1090"S.title() -> string\n\
1091\n\
1092Return a titlecased version of S, i.e. words start with uppercase\n\
1093characters, all remaining cased characters have lowercase.";
1094
1095static PyObject*
1096string_title(PyUnicodeObject *self, PyObject *args)
1097{
1098 char *s = PyString_AS_STRING(self), *s_new;
1099 int i, n = PyString_GET_SIZE(self);
1100 int previous_is_cased = 0;
1101 PyObject *new;
1102
1103 if (!PyArg_ParseTuple(args, ":title"))
1104 return NULL;
1105 new = PyString_FromStringAndSize(NULL, n);
1106 if (new == NULL)
1107 return NULL;
1108 s_new = PyString_AsString(new);
1109 for (i = 0; i < n; i++) {
1110 int c = Py_CHARMASK(*s++);
1111 if (islower(c)) {
1112 if (!previous_is_cased)
1113 c = toupper(c);
1114 previous_is_cased = 1;
1115 } else if (isupper(c)) {
1116 if (previous_is_cased)
1117 c = tolower(c);
1118 previous_is_cased = 1;
1119 } else
1120 previous_is_cased = 0;
1121 *s_new++ = c;
1122 }
1123 return new;
1124}
1125
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126static char capitalize__doc__[] =
1127"S.capitalize() -> string\n\
1128\n\
1129Return a copy of the string S with only its first character\n\
1130capitalized.";
1131
1132static PyObject *
1133string_capitalize(self, args)
1134 PyStringObject *self;
1135 PyObject *args;
1136{
1137 char *s = PyString_AS_STRING(self), *s_new;
1138 int i, n = PyString_GET_SIZE(self);
1139 PyObject *new;
1140
Guido van Rossum43713e52000-02-29 13:59:29 +00001141 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001142 return NULL;
1143 new = PyString_FromStringAndSize(NULL, n);
1144 if (new == NULL)
1145 return NULL;
1146 s_new = PyString_AsString(new);
1147 if (0 < n) {
1148 int c = Py_CHARMASK(*s++);
1149 if (islower(c))
1150 *s_new = toupper(c);
1151 else
1152 *s_new = c;
1153 s_new++;
1154 }
1155 for (i = 1; i < n; i++) {
1156 int c = Py_CHARMASK(*s++);
1157 if (isupper(c))
1158 *s_new = tolower(c);
1159 else
1160 *s_new = c;
1161 s_new++;
1162 }
1163 return new;
1164}
1165
1166
1167static char count__doc__[] =
1168"S.count(sub[, start[, end]]) -> int\n\
1169\n\
1170Return the number of occurrences of substring sub in string\n\
1171S[start:end]. Optional arguments start and end are\n\
1172interpreted as in slice notation.";
1173
1174static PyObject *
1175string_count(self, args)
1176 PyStringObject *self;
1177 PyObject *args;
1178{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001179 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180 int len = PyString_GET_SIZE(self), n;
1181 int i = 0, last = INT_MAX;
1182 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001183 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001184
Guido van Rossumc6821402000-05-08 14:08:05 +00001185 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1186 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001187 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001188
Guido van Rossum4c08d552000-03-10 22:55:18 +00001189 if (PyString_Check(subobj)) {
1190 sub = PyString_AS_STRING(subobj);
1191 n = PyString_GET_SIZE(subobj);
1192 }
1193 else if (PyUnicode_Check(subobj))
1194 return PyInt_FromLong(
1195 PyUnicode_Count((PyObject *)self, subobj, i, last));
1196 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1197 return NULL;
1198
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199 if (last > len)
1200 last = len;
1201 if (last < 0)
1202 last += len;
1203 if (last < 0)
1204 last = 0;
1205 if (i < 0)
1206 i += len;
1207 if (i < 0)
1208 i = 0;
1209 m = last + 1 - n;
1210 if (n == 0)
1211 return PyInt_FromLong((long) (m-i));
1212
1213 r = 0;
1214 while (i < m) {
1215 if (!memcmp(s+i, sub, n)) {
1216 r++;
1217 i += n;
1218 } else {
1219 i++;
1220 }
1221 }
1222 return PyInt_FromLong((long) r);
1223}
1224
1225
1226static char swapcase__doc__[] =
1227"S.swapcase() -> string\n\
1228\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001229Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230converted to lowercase and vice versa.";
1231
1232static PyObject *
1233string_swapcase(self, args)
1234 PyStringObject *self;
1235 PyObject *args;
1236{
1237 char *s = PyString_AS_STRING(self), *s_new;
1238 int i, n = PyString_GET_SIZE(self);
1239 PyObject *new;
1240
Guido van Rossum43713e52000-02-29 13:59:29 +00001241 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242 return NULL;
1243 new = PyString_FromStringAndSize(NULL, n);
1244 if (new == NULL)
1245 return NULL;
1246 s_new = PyString_AsString(new);
1247 for (i = 0; i < n; i++) {
1248 int c = Py_CHARMASK(*s++);
1249 if (islower(c)) {
1250 *s_new = toupper(c);
1251 }
1252 else if (isupper(c)) {
1253 *s_new = tolower(c);
1254 }
1255 else
1256 *s_new = c;
1257 s_new++;
1258 }
1259 return new;
1260}
1261
1262
1263static char translate__doc__[] =
1264"S.translate(table [,deletechars]) -> string\n\
1265\n\
1266Return a copy of the string S, where all characters occurring\n\
1267in the optional argument deletechars are removed, and the\n\
1268remaining characters have been mapped through the given\n\
1269translation table, which must be a string of length 256.";
1270
1271static PyObject *
1272string_translate(self, args)
1273 PyStringObject *self;
1274 PyObject *args;
1275{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001276 register char *input, *output;
1277 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278 register int i, c, changed = 0;
1279 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001280 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 int inlen, tablen, dellen = 0;
1282 PyObject *result;
1283 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001284 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001285
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286 if (!PyArg_ParseTuple(args, "O|O:translate",
1287 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001289
1290 if (PyString_Check(tableobj)) {
1291 table1 = PyString_AS_STRING(tableobj);
1292 tablen = PyString_GET_SIZE(tableobj);
1293 }
1294 else if (PyUnicode_Check(tableobj)) {
1295 /* Unicode .translate() does not support the deletechars
1296 parameter; instead a mapping to None will cause characters
1297 to be deleted. */
1298 if (delobj != NULL) {
1299 PyErr_SetString(PyExc_TypeError,
1300 "deletions are implemented differently for unicode");
1301 return NULL;
1302 }
1303 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1304 }
1305 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307
1308 if (delobj != NULL) {
1309 if (PyString_Check(delobj)) {
1310 del_table = PyString_AS_STRING(delobj);
1311 dellen = PyString_GET_SIZE(delobj);
1312 }
1313 else if (PyUnicode_Check(delobj)) {
1314 PyErr_SetString(PyExc_TypeError,
1315 "deletions are implemented differently for unicode");
1316 return NULL;
1317 }
1318 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1319 return NULL;
1320
1321 if (tablen != 256) {
1322 PyErr_SetString(PyExc_ValueError,
1323 "translation table must be 256 characters long");
1324 return NULL;
1325 }
1326 }
1327 else {
1328 del_table = NULL;
1329 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 }
1331
1332 table = table1;
1333 inlen = PyString_Size(input_obj);
1334 result = PyString_FromStringAndSize((char *)NULL, inlen);
1335 if (result == NULL)
1336 return NULL;
1337 output_start = output = PyString_AsString(result);
1338 input = PyString_AsString(input_obj);
1339
1340 if (dellen == 0) {
1341 /* If no deletions are required, use faster code */
1342 for (i = inlen; --i >= 0; ) {
1343 c = Py_CHARMASK(*input++);
1344 if (Py_CHARMASK((*output++ = table[c])) != c)
1345 changed = 1;
1346 }
1347 if (changed)
1348 return result;
1349 Py_DECREF(result);
1350 Py_INCREF(input_obj);
1351 return input_obj;
1352 }
1353
1354 for (i = 0; i < 256; i++)
1355 trans_table[i] = Py_CHARMASK(table[i]);
1356
1357 for (i = 0; i < dellen; i++)
1358 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1359
1360 for (i = inlen; --i >= 0; ) {
1361 c = Py_CHARMASK(*input++);
1362 if (trans_table[c] != -1)
1363 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1364 continue;
1365 changed = 1;
1366 }
1367 if (!changed) {
1368 Py_DECREF(result);
1369 Py_INCREF(input_obj);
1370 return input_obj;
1371 }
1372 /* Fix the size of the resulting string */
1373 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1374 return NULL;
1375 return result;
1376}
1377
1378
1379/* What follows is used for implementing replace(). Perry Stoll. */
1380
1381/*
1382 mymemfind
1383
1384 strstr replacement for arbitrary blocks of memory.
1385
Barry Warsaw51ac5802000-03-20 16:36:48 +00001386 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387 contents of memory pointed to by PAT. Returns the index into MEM if
1388 found, or -1 if not found. If len of PAT is greater than length of
1389 MEM, the function returns -1.
1390*/
1391static int
1392mymemfind(mem, len, pat, pat_len)
1393 char *mem;
1394 int len;
1395 char *pat;
1396 int pat_len;
1397{
1398 register int ii;
1399
1400 /* pattern can not occur in the last pat_len-1 chars */
1401 len -= pat_len;
1402
1403 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001404 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405 return ii;
1406 }
1407 }
1408 return -1;
1409}
1410
1411/*
1412 mymemcnt
1413
1414 Return the number of distinct times PAT is found in MEM.
1415 meaning mem=1111 and pat==11 returns 2.
1416 mem=11111 and pat==11 also return 2.
1417 */
1418static int
1419mymemcnt(mem, len, pat, pat_len)
1420 char *mem;
1421 int len;
1422 char *pat;
1423 int pat_len;
1424{
1425 register int offset = 0;
1426 int nfound = 0;
1427
1428 while (len >= 0) {
1429 offset = mymemfind(mem, len, pat, pat_len);
1430 if (offset == -1)
1431 break;
1432 mem += offset + pat_len;
1433 len -= offset + pat_len;
1434 nfound++;
1435 }
1436 return nfound;
1437}
1438
1439/*
1440 mymemreplace
1441
1442 Return a string in which all occurences of PAT in memory STR are
1443 replaced with SUB.
1444
1445 If length of PAT is less than length of STR or there are no occurences
1446 of PAT in STR, then the original string is returned. Otherwise, a new
1447 string is allocated here and returned.
1448
1449 on return, out_len is:
1450 the length of output string, or
1451 -1 if the input string is returned, or
1452 unchanged if an error occurs (no memory).
1453
1454 return value is:
1455 the new string allocated locally, or
1456 NULL if an error occurred.
1457*/
1458static char *
1459mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1460 char *str;
1461 int len; /* input string */
1462 char *pat;
1463 int pat_len; /* pattern string to find */
1464 char *sub;
1465 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001466 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467 int *out_len;
1468
1469{
1470 char *out_s;
1471 char *new_s;
1472 int nfound, offset, new_len;
1473
1474 if (len == 0 || pat_len > len)
1475 goto return_same;
1476
1477 /* find length of output string */
1478 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001479 if (count < 0)
1480 count = INT_MAX;
1481 else if (nfound > count)
1482 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483 if (nfound == 0)
1484 goto return_same;
1485 new_len = len + nfound*(sub_len - pat_len);
1486
Guido van Rossumb18618d2000-05-03 23:44:39 +00001487 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488 if (new_s == NULL) return NULL;
1489
1490 *out_len = new_len;
1491 out_s = new_s;
1492
1493 while (len > 0) {
1494 /* find index of next instance of pattern */
1495 offset = mymemfind(str, len, pat, pat_len);
1496 /* if not found, break out of loop */
1497 if (offset == -1) break;
1498
1499 /* copy non matching part of input string */
1500 memcpy(new_s, str, offset); /* copy part of str before pat */
1501 str += offset + pat_len; /* move str past pattern */
1502 len -= offset + pat_len; /* reduce length of str remaining */
1503
1504 /* copy substitute into the output string */
1505 new_s += offset; /* move new_s to dest for sub string */
1506 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1507 new_s += sub_len; /* offset new_s past sub string */
1508
1509 /* break when we've done count replacements */
1510 if (--count == 0) break;
1511 }
1512 /* copy any remaining values into output string */
1513 if (len > 0)
1514 memcpy(new_s, str, len);
1515 return out_s;
1516
1517 return_same:
1518 *out_len = -1;
1519 return str;
1520}
1521
1522
1523static char replace__doc__[] =
1524"S.replace (old, new[, maxsplit]) -> string\n\
1525\n\
1526Return a copy of string S with all occurrences of substring\n\
1527old replaced by new. If the optional argument maxsplit is\n\
1528given, only the first maxsplit occurrences are replaced.";
1529
1530static PyObject *
1531string_replace(self, args)
1532 PyStringObject *self;
1533 PyObject *args;
1534{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535 const char *str = PyString_AS_STRING(self), *sub, *repl;
1536 char *new_s;
1537 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1538 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001540 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001541
Guido van Rossum4c08d552000-03-10 22:55:18 +00001542 if (!PyArg_ParseTuple(args, "OO|i:replace",
1543 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001544 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001545
1546 if (PyString_Check(subobj)) {
1547 sub = PyString_AS_STRING(subobj);
1548 sub_len = PyString_GET_SIZE(subobj);
1549 }
1550 else if (PyUnicode_Check(subobj))
1551 return PyUnicode_Replace((PyObject *)self,
1552 subobj, replobj, count);
1553 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1554 return NULL;
1555
1556 if (PyString_Check(replobj)) {
1557 repl = PyString_AS_STRING(replobj);
1558 repl_len = PyString_GET_SIZE(replobj);
1559 }
1560 else if (PyUnicode_Check(replobj))
1561 return PyUnicode_Replace((PyObject *)self,
1562 subobj, replobj, count);
1563 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1564 return NULL;
1565
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001566 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001567 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568 return NULL;
1569 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001570 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 if (new_s == NULL) {
1572 PyErr_NoMemory();
1573 return NULL;
1574 }
1575 if (out_len == -1) {
1576 /* we're returning another reference to self */
1577 new = (PyObject*)self;
1578 Py_INCREF(new);
1579 }
1580 else {
1581 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001582 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583 }
1584 return new;
1585}
1586
1587
1588static char startswith__doc__[] =
1589"S.startswith(prefix[, start[, end]]) -> int\n\
1590\n\
1591Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1592optional start, test S beginning at that position. With optional end, stop\n\
1593comparing S at that position.";
1594
1595static PyObject *
1596string_startswith(self, args)
1597 PyStringObject *self;
1598 PyObject *args;
1599{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001600 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001602 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 int plen;
1604 int start = 0;
1605 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607
Guido van Rossumc6821402000-05-08 14:08:05 +00001608 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1609 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001610 return NULL;
1611 if (PyString_Check(subobj)) {
1612 prefix = PyString_AS_STRING(subobj);
1613 plen = PyString_GET_SIZE(subobj);
1614 }
1615 else if (PyUnicode_Check(subobj))
1616 return PyInt_FromLong(
1617 PyUnicode_Tailmatch((PyObject *)self,
1618 subobj, start, end, -1));
1619 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620 return NULL;
1621
1622 /* adopt Java semantics for index out of range. it is legal for
1623 * offset to be == plen, but this only returns true if prefix is
1624 * the empty string.
1625 */
1626 if (start < 0 || start+plen > len)
1627 return PyInt_FromLong(0);
1628
1629 if (!memcmp(str+start, prefix, plen)) {
1630 /* did the match end after the specified end? */
1631 if (end < 0)
1632 return PyInt_FromLong(1);
1633 else if (end - start < plen)
1634 return PyInt_FromLong(0);
1635 else
1636 return PyInt_FromLong(1);
1637 }
1638 else return PyInt_FromLong(0);
1639}
1640
1641
1642static char endswith__doc__[] =
1643"S.endswith(suffix[, start[, end]]) -> int\n\
1644\n\
1645Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1646optional start, test S beginning at that position. With optional end, stop\n\
1647comparing S at that position.";
1648
1649static PyObject *
1650string_endswith(self, args)
1651 PyStringObject *self;
1652 PyObject *args;
1653{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001654 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001656 const char* suffix;
1657 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 int start = 0;
1659 int end = -1;
1660 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001661 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662
Guido van Rossumc6821402000-05-08 14:08:05 +00001663 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1664 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665 return NULL;
1666 if (PyString_Check(subobj)) {
1667 suffix = PyString_AS_STRING(subobj);
1668 slen = PyString_GET_SIZE(subobj);
1669 }
1670 else if (PyUnicode_Check(subobj))
1671 return PyInt_FromLong(
1672 PyUnicode_Tailmatch((PyObject *)self,
1673 subobj, start, end, +1));
1674 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675 return NULL;
1676
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678 return PyInt_FromLong(0);
1679
1680 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684 return PyInt_FromLong(1);
1685 else return PyInt_FromLong(0);
1686}
1687
1688
Guido van Rossum4c08d552000-03-10 22:55:18 +00001689static char expandtabs__doc__[] =
1690"S.expandtabs([tabsize]) -> string\n\
1691\n\
1692Return a copy of S where all tab characters are expanded using spaces.\n\
1693If tabsize is not given, a tab size of 8 characters is assumed.";
1694
1695static PyObject*
1696string_expandtabs(PyStringObject *self, PyObject *args)
1697{
1698 const char *e, *p;
1699 char *q;
1700 int i, j;
1701 PyObject *u;
1702 int tabsize = 8;
1703
1704 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1705 return NULL;
1706
1707 /* First pass: determine size of ouput string */
1708 i = j = 0;
1709 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1710 for (p = PyString_AS_STRING(self); p < e; p++)
1711 if (*p == '\t') {
1712 if (tabsize > 0)
1713 j += tabsize - (j % tabsize);
1714 }
1715 else {
1716 j++;
1717 if (*p == '\n' || *p == '\r') {
1718 i += j;
1719 j = 0;
1720 }
1721 }
1722
1723 /* Second pass: create output string and fill it */
1724 u = PyString_FromStringAndSize(NULL, i + j);
1725 if (!u)
1726 return NULL;
1727
1728 j = 0;
1729 q = PyString_AS_STRING(u);
1730
1731 for (p = PyString_AS_STRING(self); p < e; p++)
1732 if (*p == '\t') {
1733 if (tabsize > 0) {
1734 i = tabsize - (j % tabsize);
1735 j += i;
1736 while (i--)
1737 *q++ = ' ';
1738 }
1739 }
1740 else {
1741 j++;
1742 *q++ = *p;
1743 if (*p == '\n' || *p == '\r')
1744 j = 0;
1745 }
1746
1747 return u;
1748}
1749
1750static
1751PyObject *pad(PyStringObject *self,
1752 int left,
1753 int right,
1754 char fill)
1755{
1756 PyObject *u;
1757
1758 if (left < 0)
1759 left = 0;
1760 if (right < 0)
1761 right = 0;
1762
1763 if (left == 0 && right == 0) {
1764 Py_INCREF(self);
1765 return (PyObject *)self;
1766 }
1767
1768 u = PyString_FromStringAndSize(NULL,
1769 left + PyString_GET_SIZE(self) + right);
1770 if (u) {
1771 if (left)
1772 memset(PyString_AS_STRING(u), fill, left);
1773 memcpy(PyString_AS_STRING(u) + left,
1774 PyString_AS_STRING(self),
1775 PyString_GET_SIZE(self));
1776 if (right)
1777 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1778 fill, right);
1779 }
1780
1781 return u;
1782}
1783
1784static char ljust__doc__[] =
1785"S.ljust(width) -> string\n\
1786\n\
1787Return S left justified in a string of length width. Padding is\n\
1788done using spaces.";
1789
1790static PyObject *
1791string_ljust(PyStringObject *self, PyObject *args)
1792{
1793 int width;
1794 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1795 return NULL;
1796
1797 if (PyString_GET_SIZE(self) >= width) {
1798 Py_INCREF(self);
1799 return (PyObject*) self;
1800 }
1801
1802 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1803}
1804
1805
1806static char rjust__doc__[] =
1807"S.rjust(width) -> string\n\
1808\n\
1809Return S right justified in a string of length width. Padding is\n\
1810done using spaces.";
1811
1812static PyObject *
1813string_rjust(PyStringObject *self, PyObject *args)
1814{
1815 int width;
1816 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1817 return NULL;
1818
1819 if (PyString_GET_SIZE(self) >= width) {
1820 Py_INCREF(self);
1821 return (PyObject*) self;
1822 }
1823
1824 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1825}
1826
1827
1828static char center__doc__[] =
1829"S.center(width) -> string\n\
1830\n\
1831Return S centered in a string of length width. Padding is done\n\
1832using spaces.";
1833
1834static PyObject *
1835string_center(PyStringObject *self, PyObject *args)
1836{
1837 int marg, left;
1838 int width;
1839
1840 if (!PyArg_ParseTuple(args, "i:center", &width))
1841 return NULL;
1842
1843 if (PyString_GET_SIZE(self) >= width) {
1844 Py_INCREF(self);
1845 return (PyObject*) self;
1846 }
1847
1848 marg = width - PyString_GET_SIZE(self);
1849 left = marg / 2 + (marg & width & 1);
1850
1851 return pad(self, left, marg - left, ' ');
1852}
1853
1854#if 0
1855static char zfill__doc__[] =
1856"S.zfill(width) -> string\n\
1857\n\
1858Pad a numeric string x with zeros on the left, to fill a field\n\
1859of the specified width. The string x is never truncated.";
1860
1861static PyObject *
1862string_zfill(PyStringObject *self, PyObject *args)
1863{
1864 int fill;
1865 PyObject *u;
1866 char *str;
1867
1868 int width;
1869 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1870 return NULL;
1871
1872 if (PyString_GET_SIZE(self) >= width) {
1873 Py_INCREF(self);
1874 return (PyObject*) self;
1875 }
1876
1877 fill = width - PyString_GET_SIZE(self);
1878
1879 u = pad(self, fill, 0, '0');
1880 if (u == NULL)
1881 return NULL;
1882
1883 str = PyString_AS_STRING(u);
1884 if (str[fill] == '+' || str[fill] == '-') {
1885 /* move sign to beginning of string */
1886 str[0] = str[fill];
1887 str[fill] = '0';
1888 }
1889
1890 return u;
1891}
1892#endif
1893
1894static char isspace__doc__[] =
1895"S.isspace() -> int\n\
1896\n\
1897Return 1 if there are only whitespace characters in S,\n\
18980 otherwise.";
1899
1900static PyObject*
1901string_isspace(PyStringObject *self, PyObject *args)
1902{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001903 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1904 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001905
1906 if (!PyArg_NoArgs(args))
1907 return NULL;
1908
1909 /* Shortcut for single character strings */
1910 if (PyString_GET_SIZE(self) == 1 &&
1911 isspace(*p))
1912 return PyInt_FromLong(1);
1913
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001914 /* Special case for empty strings */
1915 if (PyString_GET_SIZE(self) == 0)
1916 return PyInt_FromLong(0);
1917
Guido van Rossum4c08d552000-03-10 22:55:18 +00001918 e = p + PyString_GET_SIZE(self);
1919 for (; p < e; p++) {
1920 if (!isspace(*p))
1921 return PyInt_FromLong(0);
1922 }
1923 return PyInt_FromLong(1);
1924}
1925
1926
1927static char isdigit__doc__[] =
1928"S.isdigit() -> int\n\
1929\n\
1930Return 1 if there are only digit characters in S,\n\
19310 otherwise.";
1932
1933static PyObject*
1934string_isdigit(PyStringObject *self, PyObject *args)
1935{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001936 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1937 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001938
1939 if (!PyArg_NoArgs(args))
1940 return NULL;
1941
1942 /* Shortcut for single character strings */
1943 if (PyString_GET_SIZE(self) == 1 &&
1944 isdigit(*p))
1945 return PyInt_FromLong(1);
1946
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001947 /* Special case for empty strings */
1948 if (PyString_GET_SIZE(self) == 0)
1949 return PyInt_FromLong(0);
1950
Guido van Rossum4c08d552000-03-10 22:55:18 +00001951 e = p + PyString_GET_SIZE(self);
1952 for (; p < e; p++) {
1953 if (!isdigit(*p))
1954 return PyInt_FromLong(0);
1955 }
1956 return PyInt_FromLong(1);
1957}
1958
1959
1960static char islower__doc__[] =
1961"S.islower() -> int\n\
1962\n\
1963Return 1 if all cased characters in S are lowercase and there is\n\
1964at least one cased character in S, 0 otherwise.";
1965
1966static PyObject*
1967string_islower(PyStringObject *self, PyObject *args)
1968{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001969 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1970 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001971 int cased;
1972
1973 if (!PyArg_NoArgs(args))
1974 return NULL;
1975
1976 /* Shortcut for single character strings */
1977 if (PyString_GET_SIZE(self) == 1)
1978 return PyInt_FromLong(islower(*p) != 0);
1979
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001980 /* Special case for empty strings */
1981 if (PyString_GET_SIZE(self) == 0)
1982 return PyInt_FromLong(0);
1983
Guido van Rossum4c08d552000-03-10 22:55:18 +00001984 e = p + PyString_GET_SIZE(self);
1985 cased = 0;
1986 for (; p < e; p++) {
1987 if (isupper(*p))
1988 return PyInt_FromLong(0);
1989 else if (!cased && islower(*p))
1990 cased = 1;
1991 }
1992 return PyInt_FromLong(cased);
1993}
1994
1995
1996static char isupper__doc__[] =
1997"S.isupper() -> int\n\
1998\n\
1999Return 1 if all cased characters in S are uppercase and there is\n\
2000at least one cased character in S, 0 otherwise.";
2001
2002static PyObject*
2003string_isupper(PyStringObject *self, PyObject *args)
2004{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002005 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2006 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002007 int cased;
2008
2009 if (!PyArg_NoArgs(args))
2010 return NULL;
2011
2012 /* Shortcut for single character strings */
2013 if (PyString_GET_SIZE(self) == 1)
2014 return PyInt_FromLong(isupper(*p) != 0);
2015
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002016 /* Special case for empty strings */
2017 if (PyString_GET_SIZE(self) == 0)
2018 return PyInt_FromLong(0);
2019
Guido van Rossum4c08d552000-03-10 22:55:18 +00002020 e = p + PyString_GET_SIZE(self);
2021 cased = 0;
2022 for (; p < e; p++) {
2023 if (islower(*p))
2024 return PyInt_FromLong(0);
2025 else if (!cased && isupper(*p))
2026 cased = 1;
2027 }
2028 return PyInt_FromLong(cased);
2029}
2030
2031
2032static char istitle__doc__[] =
2033"S.istitle() -> int\n\
2034\n\
2035Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2036may only follow uncased characters and lowercase characters only cased\n\
2037ones. Return 0 otherwise.";
2038
2039static PyObject*
2040string_istitle(PyStringObject *self, PyObject *args)
2041{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002042 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2043 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 int cased, previous_is_cased;
2045
2046 if (!PyArg_NoArgs(args))
2047 return NULL;
2048
2049 /* Shortcut for single character strings */
2050 if (PyString_GET_SIZE(self) == 1)
2051 return PyInt_FromLong(isupper(*p) != 0);
2052
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002053 /* Special case for empty strings */
2054 if (PyString_GET_SIZE(self) == 0)
2055 return PyInt_FromLong(0);
2056
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057 e = p + PyString_GET_SIZE(self);
2058 cased = 0;
2059 previous_is_cased = 0;
2060 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002061 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002062
2063 if (isupper(ch)) {
2064 if (previous_is_cased)
2065 return PyInt_FromLong(0);
2066 previous_is_cased = 1;
2067 cased = 1;
2068 }
2069 else if (islower(ch)) {
2070 if (!previous_is_cased)
2071 return PyInt_FromLong(0);
2072 previous_is_cased = 1;
2073 cased = 1;
2074 }
2075 else
2076 previous_is_cased = 0;
2077 }
2078 return PyInt_FromLong(cased);
2079}
2080
2081
2082static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002083"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002084\n\
2085Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002086Line breaks are not included in the resulting list unless keepends\n\
2087is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088
2089#define SPLIT_APPEND(data, left, right) \
2090 str = PyString_FromStringAndSize(data + left, right - left); \
2091 if (!str) \
2092 goto onError; \
2093 if (PyList_Append(list, str)) { \
2094 Py_DECREF(str); \
2095 goto onError; \
2096 } \
2097 else \
2098 Py_DECREF(str);
2099
2100static PyObject*
2101string_splitlines(PyStringObject *self, PyObject *args)
2102{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002103 register int i;
2104 register int j;
2105 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002106 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 PyObject *list;
2108 PyObject *str;
2109 char *data;
2110
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002111 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002112 return NULL;
2113
2114 data = PyString_AS_STRING(self);
2115 len = PyString_GET_SIZE(self);
2116
Guido van Rossum4c08d552000-03-10 22:55:18 +00002117 list = PyList_New(0);
2118 if (!list)
2119 goto onError;
2120
2121 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002122 int eol;
2123
Guido van Rossum4c08d552000-03-10 22:55:18 +00002124 /* Find a line and append it */
2125 while (i < len && data[i] != '\n' && data[i] != '\r')
2126 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002127
2128 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002129 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130 if (i < len) {
2131 if (data[i] == '\r' && i + 1 < len &&
2132 data[i+1] == '\n')
2133 i += 2;
2134 else
2135 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002136 if (keepends)
2137 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002139 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002140 j = i;
2141 }
2142 if (j < len) {
2143 SPLIT_APPEND(data, j, len);
2144 }
2145
2146 return list;
2147
2148 onError:
2149 Py_DECREF(list);
2150 return NULL;
2151}
2152
2153#undef SPLIT_APPEND
2154
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155
2156static PyMethodDef
2157string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158 /* Counterparts of the obsolete stropmodule functions; except
2159 string.maketrans(). */
2160 {"join", (PyCFunction)string_join, 1, join__doc__},
2161 {"split", (PyCFunction)string_split, 1, split__doc__},
2162 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2163 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2164 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2165 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2166 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2167 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2168 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2170 {"count", (PyCFunction)string_count, 1, count__doc__},
2171 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2172 {"find", (PyCFunction)string_find, 1, find__doc__},
2173 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2176 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2177 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2178 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2180 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2181 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002182 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2183 {"title", (PyCFunction)string_title, 1, title__doc__},
2184 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2185 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2186 {"center", (PyCFunction)string_center, 1, center__doc__},
2187 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2188 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2189#if 0
2190 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2191#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192 {NULL, NULL} /* sentinel */
2193};
2194
2195static PyObject *
2196string_getattr(s, name)
2197 PyStringObject *s;
2198 char *name;
2199{
2200 return Py_FindMethod(string_methods, (PyObject*)s, name);
2201}
2202
2203
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002204PyTypeObject PyString_Type = {
2205 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002206 0,
2207 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002208 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002209 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002210 (destructor)string_dealloc, /*tp_dealloc*/
2211 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002213 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002214 (cmpfunc)string_compare, /*tp_compare*/
2215 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002216 0, /*tp_as_number*/
2217 &string_as_sequence, /*tp_as_sequence*/
2218 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002219 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002220 0, /*tp_call*/
2221 0, /*tp_str*/
2222 0, /*tp_getattro*/
2223 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002224 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002225 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002226 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002227};
2228
2229void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002230PyString_Concat(pv, w)
2231 register PyObject **pv;
2232 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002233{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002234 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002235 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002236 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002237 if (w == NULL || !PyString_Check(*pv)) {
2238 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002239 *pv = NULL;
2240 return;
2241 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002242 v = string_concat((PyStringObject *) *pv, w);
2243 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002244 *pv = v;
2245}
2246
Guido van Rossum013142a1994-08-30 08:19:36 +00002247void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002248PyString_ConcatAndDel(pv, w)
2249 register PyObject **pv;
2250 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002251{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002252 PyString_Concat(pv, w);
2253 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002254}
2255
2256
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002257/* The following function breaks the notion that strings are immutable:
2258 it changes the size of a string. We get away with this only if there
2259 is only one module referencing the object. You can also think of it
2260 as creating a new string object and destroying the old one, only
2261 more efficiently. In any case, don't use this if the string may
2262 already be known to some other part of the code... */
2263
2264int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002265_PyString_Resize(pv, newsize)
2266 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002267 int newsize;
2268{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002269 register PyObject *v;
2270 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002271 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002272 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002273 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002274 Py_DECREF(v);
2275 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002276 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002277 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002278 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002279#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002280 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002281#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002282 _Py_ForgetReference(v);
2283 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002284 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002285 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002286 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002287 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002288 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002289 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002290 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002291 _Py_NewReference(*pv);
2292 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002293 sv->ob_size = newsize;
2294 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002295 return 0;
2296}
Guido van Rossume5372401993-03-16 12:15:04 +00002297
2298/* Helpers for formatstring */
2299
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002300static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002301getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002302 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002303 int arglen;
2304 int *p_argidx;
2305{
2306 int argidx = *p_argidx;
2307 if (argidx < arglen) {
2308 (*p_argidx)++;
2309 if (arglen < 0)
2310 return args;
2311 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002312 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002313 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002314 PyErr_SetString(PyExc_TypeError,
2315 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002316 return NULL;
2317}
2318
2319#define F_LJUST (1<<0)
2320#define F_SIGN (1<<1)
2321#define F_BLANK (1<<2)
2322#define F_ALT (1<<3)
2323#define F_ZERO (1<<4)
2324
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002325static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002326formatfloat(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002327 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002328 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002329 int flags;
2330 int prec;
2331 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002332 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002333{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002334 /* fmt = '%#.' + `prec` + `type`
2335 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002336 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002337 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002338 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002339 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002340 if (prec < 0)
2341 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002342 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2343 type = 'g';
2344 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002345 /* worst case length calc to ensure no buffer overrun:
2346 fmt = %#.<prec>g
2347 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2348 for any double rep.)
2349 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2350 If prec=0 the effective precision is 1 (the leading digit is
2351 always given), therefore increase by one to 10+prec. */
2352 if (buflen <= (size_t)10 + (size_t)prec) {
2353 PyErr_SetString(PyExc_OverflowError,
2354 "formatted float is too long (precision too long?)");
2355 return -1;
2356 }
Guido van Rossume5372401993-03-16 12:15:04 +00002357 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002358 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002359}
2360
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002361static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002362formatint(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002363 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002364 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002365 int flags;
2366 int prec;
2367 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002368 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002369{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002370 /* fmt = '%#.' + `prec` + 'l' + `type`
2371 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002372 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002373 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002374 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002375 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002376 if (prec < 0)
2377 prec = 1;
2378 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002379 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2380 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2381 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2382 PyErr_SetString(PyExc_OverflowError,
2383 "formatted integer is too long (precision too long?)");
2384 return -1;
2385 }
Guido van Rossume5372401993-03-16 12:15:04 +00002386 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002387 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002388}
2389
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002390static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002391formatchar(buf, buflen, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002392 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002393 size_t buflen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002394 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002395{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002396 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002397 if (PyString_Check(v)) {
2398 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002399 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002400 }
2401 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002402 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002403 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002404 }
2405 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002406 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002407}
2408
Guido van Rossum013142a1994-08-30 08:19:36 +00002409
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002410/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2411
2412 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2413 chars are formatted. XXX This is a magic number. Each formatting
2414 routine does bounds checking to ensure no overflow, but a better
2415 solution may be to malloc a buffer of appropriate size for each
2416 format. For now, the current solution is sufficient.
2417*/
2418#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002419
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002420PyObject *
2421PyString_Format(format, args)
2422 PyObject *format;
2423 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002424{
2425 char *fmt, *res;
2426 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002427 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002428 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002429 PyObject *dict = NULL;
2430 if (format == NULL || !PyString_Check(format) || args == NULL) {
2431 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002432 return NULL;
2433 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002434 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002435 fmt = PyString_AsString(format);
2436 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002437 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002438 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002439 if (result == NULL)
2440 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002441 res = PyString_AsString(result);
2442 if (PyTuple_Check(args)) {
2443 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002444 argidx = 0;
2445 }
2446 else {
2447 arglen = -1;
2448 argidx = -2;
2449 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002450 if (args->ob_type->tp_as_mapping)
2451 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002452 while (--fmtcnt >= 0) {
2453 if (*fmt != '%') {
2454 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002455 rescnt = fmtcnt + 100;
2456 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002457 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002458 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002459 res = PyString_AsString(result)
2460 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002461 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002462 }
2463 *res++ = *fmt++;
2464 }
2465 else {
2466 /* Got a format specifier */
2467 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002468 int width = -1;
2469 int prec = -1;
2470 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002471 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002472 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002473 PyObject *v = NULL;
2474 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002475 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002476 int sign;
2477 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002478 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002479 char *fmt_start = fmt;
2480
Guido van Rossumda9c2711996-12-05 21:58:58 +00002481 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002482 if (*fmt == '(') {
2483 char *keystart;
2484 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002485 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002486 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002487
2488 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002489 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002490 "format requires a mapping");
2491 goto error;
2492 }
2493 ++fmt;
2494 --fmtcnt;
2495 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002496 /* Skip over balanced parentheses */
2497 while (pcount > 0 && --fmtcnt >= 0) {
2498 if (*fmt == ')')
2499 --pcount;
2500 else if (*fmt == '(')
2501 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002502 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002503 }
2504 keylen = fmt - keystart - 1;
2505 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002506 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002507 "incomplete format key");
2508 goto error;
2509 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002510 key = PyString_FromStringAndSize(keystart,
2511 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002512 if (key == NULL)
2513 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002514 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002515 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002516 args_owned = 0;
2517 }
2518 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002519 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002520 if (args == NULL) {
2521 goto error;
2522 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002523 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002524 arglen = -1;
2525 argidx = -2;
2526 }
Guido van Rossume5372401993-03-16 12:15:04 +00002527 while (--fmtcnt >= 0) {
2528 switch (c = *fmt++) {
2529 case '-': flags |= F_LJUST; continue;
2530 case '+': flags |= F_SIGN; continue;
2531 case ' ': flags |= F_BLANK; continue;
2532 case '#': flags |= F_ALT; continue;
2533 case '0': flags |= F_ZERO; continue;
2534 }
2535 break;
2536 }
2537 if (c == '*') {
2538 v = getnextarg(args, arglen, &argidx);
2539 if (v == NULL)
2540 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002541 if (!PyInt_Check(v)) {
2542 PyErr_SetString(PyExc_TypeError,
2543 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002544 goto error;
2545 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002546 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002547 if (width < 0) {
2548 flags |= F_LJUST;
2549 width = -width;
2550 }
Guido van Rossume5372401993-03-16 12:15:04 +00002551 if (--fmtcnt >= 0)
2552 c = *fmt++;
2553 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002554 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002555 width = c - '0';
2556 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002557 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002558 if (!isdigit(c))
2559 break;
2560 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002561 PyErr_SetString(
2562 PyExc_ValueError,
2563 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002564 goto error;
2565 }
2566 width = width*10 + (c - '0');
2567 }
2568 }
2569 if (c == '.') {
2570 prec = 0;
2571 if (--fmtcnt >= 0)
2572 c = *fmt++;
2573 if (c == '*') {
2574 v = getnextarg(args, arglen, &argidx);
2575 if (v == NULL)
2576 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002577 if (!PyInt_Check(v)) {
2578 PyErr_SetString(
2579 PyExc_TypeError,
2580 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002581 goto error;
2582 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002583 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002584 if (prec < 0)
2585 prec = 0;
2586 if (--fmtcnt >= 0)
2587 c = *fmt++;
2588 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002589 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002590 prec = c - '0';
2591 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002592 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002593 if (!isdigit(c))
2594 break;
2595 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002596 PyErr_SetString(
2597 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002598 "prec too big");
2599 goto error;
2600 }
2601 prec = prec*10 + (c - '0');
2602 }
2603 }
2604 } /* prec */
2605 if (fmtcnt >= 0) {
2606 if (c == 'h' || c == 'l' || c == 'L') {
2607 size = c;
2608 if (--fmtcnt >= 0)
2609 c = *fmt++;
2610 }
2611 }
2612 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002613 PyErr_SetString(PyExc_ValueError,
2614 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002615 goto error;
2616 }
2617 if (c != '%') {
2618 v = getnextarg(args, arglen, &argidx);
2619 if (v == NULL)
2620 goto error;
2621 }
2622 sign = 0;
2623 fill = ' ';
2624 switch (c) {
2625 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002626 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002627 len = 1;
2628 break;
2629 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002630 case 'r':
2631 if (PyUnicode_Check(v)) {
2632 fmt = fmt_start;
2633 goto unicode;
2634 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002635 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002636 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002637 else
2638 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002639 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002640 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002641 if (!PyString_Check(temp)) {
2642 PyErr_SetString(PyExc_TypeError,
2643 "%s argument has non-string str()");
2644 goto error;
2645 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002646 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002647 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002648 if (prec >= 0 && len > prec)
2649 len = prec;
2650 break;
2651 case 'i':
2652 case 'd':
2653 case 'u':
2654 case 'o':
2655 case 'x':
2656 case 'X':
2657 if (c == 'i')
2658 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002659 pbuf = formatbuf;
2660 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002661 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002662 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002663 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002664 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002665 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002666 if ((flags&F_ALT) &&
2667 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002668 pbuf[0] == '0' && pbuf[1] == c) {
2669 *res++ = *pbuf++;
2670 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002671 rescnt -= 2;
2672 len -= 2;
2673 width -= 2;
2674 if (width < 0)
2675 width = 0;
2676 }
2677 }
Guido van Rossume5372401993-03-16 12:15:04 +00002678 break;
2679 case 'e':
2680 case 'E':
2681 case 'f':
2682 case 'g':
2683 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002684 pbuf = formatbuf;
2685 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002686 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002687 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002688 sign = 1;
2689 if (flags&F_ZERO)
2690 fill = '0';
2691 break;
2692 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002693 pbuf = formatbuf;
2694 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002695 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002696 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002697 break;
2698 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002699 PyErr_Format(PyExc_ValueError,
2700 "unsupported format character '%c' (0x%x)",
2701 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002702 goto error;
2703 }
2704 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002705 if (*pbuf == '-' || *pbuf == '+') {
2706 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002707 len--;
2708 }
2709 else if (flags & F_SIGN)
2710 sign = '+';
2711 else if (flags & F_BLANK)
2712 sign = ' ';
2713 else
2714 sign = '\0';
2715 }
2716 if (width < len)
2717 width = len;
2718 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002719 reslen -= rescnt;
2720 rescnt = width + fmtcnt + 100;
2721 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002722 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002723 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002724 res = PyString_AsString(result)
2725 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002726 }
2727 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002728 if (fill != ' ')
2729 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002730 rescnt--;
2731 if (width > len)
2732 width--;
2733 }
2734 if (width > len && !(flags&F_LJUST)) {
2735 do {
2736 --rescnt;
2737 *res++ = fill;
2738 } while (--width > len);
2739 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002740 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002741 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002742 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002743 res += len;
2744 rescnt -= len;
2745 while (--width >= len) {
2746 --rescnt;
2747 *res++ = ' ';
2748 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002749 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002750 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002751 "not all arguments converted");
2752 goto error;
2753 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002754 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002755 } /* '%' */
2756 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002757 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002758 PyErr_SetString(PyExc_TypeError,
2759 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002760 goto error;
2761 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002762 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002763 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002764 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002765 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002766 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002767
2768 unicode:
2769 if (args_owned) {
2770 Py_DECREF(args);
2771 args_owned = 0;
2772 }
2773 /* Fiddle args right (remove the first argidx-1 arguments) */
2774 --argidx;
2775 if (PyTuple_Check(orig_args) && argidx > 0) {
2776 PyObject *v;
2777 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2778 v = PyTuple_New(n);
2779 if (v == NULL)
2780 goto error;
2781 while (--n >= 0) {
2782 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2783 Py_INCREF(w);
2784 PyTuple_SET_ITEM(v, n, w);
2785 }
2786 args = v;
2787 } else {
2788 Py_INCREF(orig_args);
2789 args = orig_args;
2790 }
2791 /* Paste rest of format string to what we have of the result
2792 string; we reuse result for this */
2793 rescnt = res - PyString_AS_STRING(result);
2794 fmtcnt = PyString_GET_SIZE(format) - \
2795 (fmt - PyString_AS_STRING(format));
2796 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2797 Py_DECREF(args);
2798 goto error;
2799 }
2800 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2801 format = result;
2802 /* Let Unicode do its magic */
2803 result = PyUnicode_Format(format, args);
2804 Py_DECREF(format);
2805 Py_DECREF(args);
2806 return result;
2807
Guido van Rossume5372401993-03-16 12:15:04 +00002808 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002809 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002810 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002811 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002812 }
Guido van Rossume5372401993-03-16 12:15:04 +00002813 return NULL;
2814}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002815
2816
2817#ifdef INTERN_STRINGS
2818
2819static PyObject *interned;
2820
2821void
2822PyString_InternInPlace(p)
2823 PyObject **p;
2824{
2825 register PyStringObject *s = (PyStringObject *)(*p);
2826 PyObject *t;
2827 if (s == NULL || !PyString_Check(s))
2828 Py_FatalError("PyString_InternInPlace: strings only please!");
2829 if ((t = s->ob_sinterned) != NULL) {
2830 if (t == (PyObject *)s)
2831 return;
2832 Py_INCREF(t);
2833 *p = t;
2834 Py_DECREF(s);
2835 return;
2836 }
2837 if (interned == NULL) {
2838 interned = PyDict_New();
2839 if (interned == NULL)
2840 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002841 }
2842 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2843 Py_INCREF(t);
2844 *p = s->ob_sinterned = t;
2845 Py_DECREF(s);
2846 return;
2847 }
2848 t = (PyObject *)s;
2849 if (PyDict_SetItem(interned, t, t) == 0) {
2850 s->ob_sinterned = t;
2851 return;
2852 }
2853 PyErr_Clear();
2854}
2855
2856
2857PyObject *
2858PyString_InternFromString(cp)
2859 const char *cp;
2860{
2861 PyObject *s = PyString_FromString(cp);
2862 if (s == NULL)
2863 return NULL;
2864 PyString_InternInPlace(&s);
2865 return s;
2866}
2867
2868#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002869
2870void
2871PyString_Fini()
2872{
2873 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002874 for (i = 0; i < UCHAR_MAX + 1; i++) {
2875 Py_XDECREF(characters[i]);
2876 characters[i] = NULL;
2877 }
2878#ifndef DONT_SHARE_SHORT_STRINGS
2879 Py_XDECREF(nullstring);
2880 nullstring = NULL;
2881#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002882#ifdef INTERN_STRINGS
2883 if (interned) {
2884 int pos, changed;
2885 PyObject *key, *value;
2886 do {
2887 changed = 0;
2888 pos = 0;
2889 while (PyDict_Next(interned, &pos, &key, &value)) {
2890 if (key->ob_refcnt == 2 && key == value) {
2891 PyDict_DelItem(interned, key);
2892 changed = 1;
2893 }
2894 }
2895 } while (changed);
2896 }
2897#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002898}