blob: 3b7fcde09ac8d0a4075ca936af106f5d4ff07315 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001/***********************************************************
Guido van Rossum6610ad91995-01-04 19:07:38 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossumf70e43a1991-02-19 12:39:46 +00004
5 All Rights Reserved
6
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007Copyright (c) 2000, BeOpen.com.
8Copyright (c) 1995-2000, Corporation for National Research Initiatives.
9Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
10All rights reserved.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000011
Guido van Rossumfd71b9e2000-06-30 23:50:40 +000012See the file "Misc/COPYRIGHT" for information on usage and
13redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossumf70e43a1991-02-19 12:39:46 +000014
15******************************************************************/
16
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000017/* String object implementation */
18
Guido van Rossumc0b618a1997-05-02 03:12:38 +000019#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000020
Guido van Rossum71160aa1997-06-03 18:03:18 +000021#include "mymath.h"
Guido van Rossum013142a1994-08-30 08:19:36 +000022#include <ctype.h>
23
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000024#ifdef COUNT_ALLOCS
25int null_strings, one_strings;
26#endif
27
Guido van Rossum03093a21994-09-28 15:51:32 +000028#ifdef HAVE_LIMITS_H
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029#include <limits.h>
30#else
31#ifndef UCHAR_MAX
32#define UCHAR_MAX 255
33#endif
34#endif
35
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000037#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000038static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000039#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000040
41/*
42 Newsizedstringobject() and newstringobject() try in certain cases
43 to share string objects. When the size of the string is zero,
44 these routines always return a pointer to the same string object;
45 when the size is one, they return a pointer to an already existing
46 object if the contents of the string is known. For
47 newstringobject() this is always the case, for
48 newsizedstringobject() this is the case when the first argument in
49 not NULL.
50 A common practice to allocate a string and then fill it in or
51 change it must be done carefully. It is only allowed to change the
52 contents of the string if the obect was gotten from
53 newsizedstringobject() with a NULL first argument, because in the
54 future these routines may try to do even more sharing of objects.
55*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000056PyObject *
57PyString_FromStringAndSize(str, size)
Guido van Rossum067998f1996-12-10 15:33:34 +000058 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059 int size;
60{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000062#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 if (size == 0 && (op = nullstring) != NULL) {
64#ifdef COUNT_ALLOCS
65 null_strings++;
66#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067 Py_INCREF(op);
68 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 if (size == 1 && str != NULL &&
71 (op = characters[*str & UCHAR_MAX]) != NULL)
72 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073#ifdef COUNT_ALLOCS
74 one_strings++;
75#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 Py_INCREF(op);
77 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000079#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000080
81 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000083 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000086 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087#ifdef CACHE_HASH
88 op->ob_shash = -1;
89#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000090#ifdef INTERN_STRINGS
91 op->ob_sinterned = NULL;
92#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 if (str != NULL)
94 memcpy(op->ob_sval, str, size);
95 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0) {
98 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 } else if (size == 1 && str != NULL) {
101 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000102 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000104#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000105 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106}
107
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108PyObject *
109PyString_FromString(str)
Guido van Rossum067998f1996-12-10 15:33:34 +0000110 const char *str;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000111{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000114 if (size > INT_MAX) {
115 PyErr_SetString(PyExc_OverflowError,
116 "string is too long for a Python string");
117 return NULL;
118 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000119#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 if (size == 0 && (op = nullstring) != NULL) {
121#ifdef COUNT_ALLOCS
122 null_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
127 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
128#ifdef COUNT_ALLOCS
129 one_strings++;
130#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000134#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000135
136 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000137 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000138 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000139 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000140 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000141 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142#ifdef CACHE_HASH
143 op->ob_shash = -1;
144#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000145#ifdef INTERN_STRINGS
146 op->ob_sinterned = NULL;
147#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000148 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000149#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 if (size == 0) {
151 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000153 } else if (size == 1) {
154 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000157#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000158 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000159}
160
Guido van Rossum234f9421993-06-17 12:35:49 +0000161static void
Guido van Rossume5372401993-03-16 12:15:04 +0000162string_dealloc(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000163 PyObject *op;
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000164{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000165 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000166}
167
Guido van Rossumd7047b31995-01-02 19:07:15 +0000168int
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000169PyString_Size(op)
170 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000171{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000172 if (!PyString_Check(op)) {
173 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000174 return -1;
175 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000176 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000177}
178
179/*const*/ char *
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000180PyString_AsString(op)
181 register PyObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000182{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000183 if (!PyString_Check(op)) {
184 PyErr_BadInternalCall();
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000185 return NULL;
186 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000187 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000188}
189
190/* Methods */
191
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000192static int
Guido van Rossume5372401993-03-16 12:15:04 +0000193string_print(op, fp, flags)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000194 PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000195 FILE *fp;
196 int flags;
197{
198 int i;
199 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000200 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000201 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000202 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000203 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000204 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000205 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000206
207 /* figure out which quote to use; single is prefered */
208 quote = '\'';
209 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
210 quote = '"';
211
212 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000213 for (i = 0; i < op->ob_size; i++) {
214 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000215 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000216 fprintf(fp, "\\%c", c);
217 else if (c < ' ' || c >= 0177)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000218 fprintf(fp, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000219 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000220 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000221 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000222 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000223 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000224}
225
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000226static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000227string_repr(op)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000228 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000229{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000230 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
231 PyObject *v;
232 if (newsize > INT_MAX) {
233 PyErr_SetString(PyExc_OverflowError,
234 "string is too large to make repr");
235 }
236 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000237 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000238 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239 }
240 else {
241 register int i;
242 register char c;
243 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000244 int quote;
245
246 /* figure out which quote to use; single is prefered */
247 quote = '\'';
248 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
249 quote = '"';
250
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000251 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000252 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000253 for (i = 0; i < op->ob_size; i++) {
254 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000255 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000256 *p++ = '\\', *p++ = c;
257 else if (c < ' ' || c >= 0177) {
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000258 sprintf(p, "\\%03o", c & 0377);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000259 while (*p != '\0')
260 p++;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000261 }
262 else
263 *p++ = c;
264 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000265 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000266 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000267 _PyString_Resize(
268 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000269 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000270 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000271}
272
273static int
Guido van Rossume5372401993-03-16 12:15:04 +0000274string_length(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000275 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276{
277 return a->ob_size;
278}
279
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000280static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000281string_concat(a, bb)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000282 register PyStringObject *a;
283 register PyObject *bb;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284{
285 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000286 register PyStringObject *op;
287 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000288 if (PyUnicode_Check(bb))
289 return PyUnicode_Concat((PyObject *)a, bb);
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000290 PyErr_Format(PyExc_TypeError,
291 "cannot add type \"%.200s\" to string",
292 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000293 return NULL;
294 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000295#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000296 /* Optimize cases with empty left or right operand */
297 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000298 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000299 return bb;
300 }
301 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000302 Py_INCREF(a);
303 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000304 }
305 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000306 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000307 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000308 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000309 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000310 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000311 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000312#ifdef CACHE_HASH
313 op->ob_shash = -1;
314#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000315#ifdef INTERN_STRINGS
316 op->ob_sinterned = NULL;
317#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000318 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
319 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
320 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000321 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322#undef b
323}
324
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000326string_repeat(a, n)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000327 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000328 register int n;
329{
330 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000331 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000332 register PyStringObject *op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000333 if (n < 0)
334 n = 0;
335 size = a->ob_size * n;
336 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000337 Py_INCREF(a);
338 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000339 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000340 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000341 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000342 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000343 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000344 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000345 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000346#ifdef CACHE_HASH
347 op->ob_shash = -1;
348#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000349#ifdef INTERN_STRINGS
350 op->ob_sinterned = NULL;
351#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000352 for (i = 0; i < size; i += a->ob_size)
353 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
354 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000355 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000356}
357
358/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
359
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000360static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000361string_slice(a, i, j)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000362 register PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 register int i, j; /* May be negative! */
364{
365 if (i < 0)
366 i = 0;
367 if (j < 0)
368 j = 0; /* Avoid signed/unsigned bug in next line */
369 if (j > a->ob_size)
370 j = a->ob_size;
371 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000372 Py_INCREF(a);
373 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000374 }
375 if (j < i)
376 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000378}
379
Guido van Rossum9284a572000-03-07 15:53:43 +0000380static int
381string_contains(a, el)
382PyObject *a, *el;
383{
384 register char *s, *end;
385 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000386 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000387 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000388 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000389 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000390 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000391 return -1;
392 }
393 c = PyString_AsString(el)[0];
394 s = PyString_AsString(a);
395 end = s + PyString_Size(a);
396 while (s < end) {
397 if (c == *s++)
398 return 1;
399 }
400 return 0;
401}
402
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000403static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +0000404string_item(a, i)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000405 PyStringObject *a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000406 register int i;
407{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000408 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000409 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000411 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412 return NULL;
413 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000414 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000416#ifdef COUNT_ALLOCS
417 if (v != NULL)
418 one_strings++;
419#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000420 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000422 if (v == NULL)
423 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000424 characters[c] = (PyStringObject *) v;
425 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000426 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000428 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000429}
430
431static int
Guido van Rossume5372401993-03-16 12:15:04 +0000432string_compare(a, b)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 PyStringObject *a, *b;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000434{
Guido van Rossum253919f1991-02-13 23:18:39 +0000435 int len_a = a->ob_size, len_b = b->ob_size;
436 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000437 int cmp;
438 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000439 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000440 if (cmp == 0)
441 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
442 if (cmp != 0)
443 return cmp;
444 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000445 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000446}
447
Guido van Rossum9bfef441993-03-29 10:43:31 +0000448static long
449string_hash(a)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000450 PyStringObject *a;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000451{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000452 register int len;
453 register unsigned char *p;
454 register long x;
455
456#ifdef CACHE_HASH
457 if (a->ob_shash != -1)
458 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000459#ifdef INTERN_STRINGS
460 if (a->ob_sinterned != NULL)
461 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000462 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000463#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000464#endif
465 len = a->ob_size;
466 p = (unsigned char *) a->ob_sval;
467 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000468 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000469 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000470 x ^= a->ob_size;
471 if (x == -1)
472 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000473#ifdef CACHE_HASH
474 a->ob_shash = x;
475#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000476 return x;
477}
478
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000479static int
480string_buffer_getreadbuf(self, index, ptr)
481 PyStringObject *self;
482 int index;
483 const void **ptr;
484{
485 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000486 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000487 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000488 return -1;
489 }
490 *ptr = (void *)self->ob_sval;
491 return self->ob_size;
492}
493
494static int
495string_buffer_getwritebuf(self, index, ptr)
496 PyStringObject *self;
497 int index;
498 const void **ptr;
499{
Guido van Rossum045e6881997-09-08 18:30:11 +0000500 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000501 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000502 return -1;
503}
504
505static int
506string_buffer_getsegcount(self, lenp)
507 PyStringObject *self;
508 int *lenp;
509{
510 if ( lenp )
511 *lenp = self->ob_size;
512 return 1;
513}
514
Guido van Rossum1db70701998-10-08 02:18:52 +0000515static int
516string_buffer_getcharbuf(self, index, ptr)
517 PyStringObject *self;
518 int index;
519 const char **ptr;
520{
521 if ( index != 0 ) {
522 PyErr_SetString(PyExc_SystemError,
523 "accessing non-existent string segment");
524 return -1;
525 }
526 *ptr = self->ob_sval;
527 return self->ob_size;
528}
529
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000530static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000531 (inquiry)string_length, /*sq_length*/
532 (binaryfunc)string_concat, /*sq_concat*/
533 (intargfunc)string_repeat, /*sq_repeat*/
534 (intargfunc)string_item, /*sq_item*/
535 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000536 0, /*sq_ass_item*/
537 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000538 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000539};
540
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000541static PyBufferProcs string_as_buffer = {
542 (getreadbufferproc)string_buffer_getreadbuf,
543 (getwritebufferproc)string_buffer_getwritebuf,
544 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000545 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000546};
547
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000548
549
550#define LEFTSTRIP 0
551#define RIGHTSTRIP 1
552#define BOTHSTRIP 2
553
554
555static PyObject *
556split_whitespace(s, len, maxsplit)
557 char *s;
558 int len;
559 int maxsplit;
560{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000561 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000562 PyObject* item;
563 PyObject *list = PyList_New(0);
564
565 if (list == NULL)
566 return NULL;
567
Guido van Rossum4c08d552000-03-10 22:55:18 +0000568 for (i = j = 0; i < len; ) {
569 while (i < len && isspace(Py_CHARMASK(s[i])))
570 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000571 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000572 while (i < len && !isspace(Py_CHARMASK(s[i])))
573 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000574 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000575 if (maxsplit-- <= 0)
576 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000577 item = PyString_FromStringAndSize(s+j, (int)(i-j));
578 if (item == NULL)
579 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000580 err = PyList_Append(list, item);
581 Py_DECREF(item);
582 if (err < 0)
583 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000584 while (i < len && isspace(Py_CHARMASK(s[i])))
585 i++;
586 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000587 }
588 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000589 if (j < len) {
590 item = PyString_FromStringAndSize(s+j, (int)(len - j));
591 if (item == NULL)
592 goto finally;
593 err = PyList_Append(list, item);
594 Py_DECREF(item);
595 if (err < 0)
596 goto finally;
597 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000598 return list;
599 finally:
600 Py_DECREF(list);
601 return NULL;
602}
603
604
605static char split__doc__[] =
606"S.split([sep [,maxsplit]]) -> list of strings\n\
607\n\
608Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000609delimiter string. If maxsplit is given, at most maxsplit\n\
610splits are done. If sep is not specified, any whitespace string\n\
611is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000612
613static PyObject *
614string_split(self, args)
615 PyStringObject *self;
616 PyObject *args;
617{
618 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000619 int maxsplit = -1;
620 const char *s = PyString_AS_STRING(self), *sub;
621 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000622
Guido van Rossum4c08d552000-03-10 22:55:18 +0000623 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000624 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000625 if (maxsplit < 0)
626 maxsplit = INT_MAX;
627 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000628 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000629 if (PyString_Check(subobj)) {
630 sub = PyString_AS_STRING(subobj);
631 n = PyString_GET_SIZE(subobj);
632 }
633 else if (PyUnicode_Check(subobj))
634 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
635 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
636 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000637 if (n == 0) {
638 PyErr_SetString(PyExc_ValueError, "empty separator");
639 return NULL;
640 }
641
642 list = PyList_New(0);
643 if (list == NULL)
644 return NULL;
645
646 i = j = 0;
647 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000648 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000649 if (maxsplit-- <= 0)
650 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000651 item = PyString_FromStringAndSize(s+j, (int)(i-j));
652 if (item == NULL)
653 goto fail;
654 err = PyList_Append(list, item);
655 Py_DECREF(item);
656 if (err < 0)
657 goto fail;
658 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000659 }
660 else
661 i++;
662 }
663 item = PyString_FromStringAndSize(s+j, (int)(len-j));
664 if (item == NULL)
665 goto fail;
666 err = PyList_Append(list, item);
667 Py_DECREF(item);
668 if (err < 0)
669 goto fail;
670
671 return list;
672
673 fail:
674 Py_DECREF(list);
675 return NULL;
676}
677
678
679static char join__doc__[] =
680"S.join(sequence) -> string\n\
681\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000682Return a string which is the concatenation of the strings in the\n\
683sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000684
685static PyObject *
686string_join(self, args)
687 PyStringObject *self;
688 PyObject *args;
689{
690 char *sep = PyString_AS_STRING(self);
691 int seplen = PyString_GET_SIZE(self);
692 PyObject *res = NULL;
693 int reslen = 0;
694 char *p;
695 int seqlen = 0;
696 int sz = 100;
697 int i, slen;
698 PyObject *seq;
699
Guido van Rossum43713e52000-02-29 13:59:29 +0000700 if (!PyArg_ParseTuple(args, "O:join", &seq))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000701 return NULL;
702
703 seqlen = PySequence_Length(seq);
704 if (seqlen < 0 && PyErr_Occurred())
705 return NULL;
706
707 if (seqlen == 1) {
708 /* Optimization if there's only one item */
709 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000710 if (item == NULL)
711 return NULL;
712 if (!PyString_Check(item) &&
713 !PyUnicode_Check(item)) {
714 PyErr_SetString(PyExc_TypeError,
715 "first argument must be sequence of strings");
716 Py_DECREF(item);
717 return NULL;
718 }
719 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000720 }
721 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
722 return NULL;
723 p = PyString_AsString(res);
724
725 /* optimize for lists. all others (tuples and arbitrary sequences)
726 * just use the abstract interface.
727 */
728 if (PyList_Check(seq)) {
729 for (i = 0; i < seqlen; i++) {
730 PyObject *item = PyList_GET_ITEM(seq, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000731 if (!PyString_Check(item)){
732 if (PyUnicode_Check(item)) {
733 Py_DECREF(res);
734 return PyUnicode_Join(
735 (PyObject *)self,
736 seq);
Barry Warsawbf325832000-03-06 14:52:18 +0000737 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000738 PyErr_Format(PyExc_TypeError,
739 "sequence item %i not a string",
740 i);
741 goto finally;
742 }
743 slen = PyString_GET_SIZE(item);
744 while (reslen + slen + seplen >= sz) {
745 if (_PyString_Resize(&res, sz*2))
746 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000747 sz *= 2;
748 p = PyString_AsString(res) + reslen;
749 }
750 if (i > 0) {
751 memcpy(p, sep, seplen);
752 p += seplen;
753 reslen += seplen;
754 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000755 memcpy(p, PyString_AS_STRING(item), slen);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000756 p += slen;
757 reslen += slen;
758 }
759 }
760 else {
761 for (i = 0; i < seqlen; i++) {
762 PyObject *item = PySequence_GetItem(seq, i);
Barry Warsawbf325832000-03-06 14:52:18 +0000763 if (!item)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000764 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000765 if (!PyString_Check(item)){
766 if (PyUnicode_Check(item)) {
767 Py_DECREF(res);
768 Py_DECREF(item);
769 return PyUnicode_Join(
770 (PyObject *)self,
771 seq);
772 }
773 Py_DECREF(item);
774 PyErr_Format(PyExc_TypeError,
775 "sequence item %i not a string",
776 i);
Barry Warsawbf325832000-03-06 14:52:18 +0000777 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000778 }
779 slen = PyString_GET_SIZE(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000780 while (reslen + slen + seplen >= sz) {
Barry Warsawbf325832000-03-06 14:52:18 +0000781 if (_PyString_Resize(&res, sz*2)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000782 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000783 goto finally;
Barry Warsawbf325832000-03-06 14:52:18 +0000784 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000785 sz *= 2;
786 p = PyString_AsString(res) + reslen;
787 }
788 if (i > 0) {
789 memcpy(p, sep, seplen);
790 p += seplen;
791 reslen += seplen;
792 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000793 memcpy(p, PyString_AS_STRING(item), slen);
794 Py_DECREF(item);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000795 p += slen;
796 reslen += slen;
797 }
798 }
799 if (_PyString_Resize(&res, reslen))
800 goto finally;
801 return res;
802
803 finally:
804 Py_DECREF(res);
805 return NULL;
806}
807
808
809
810static long
Guido van Rossum4c08d552000-03-10 22:55:18 +0000811string_find_internal(self, args, dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000812 PyStringObject *self;
813 PyObject *args;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000814 int dir;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000815{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000816 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000817 int len = PyString_GET_SIZE(self);
818 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000819 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000820
Guido van Rossumc6821402000-05-08 14:08:05 +0000821 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
822 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000823 return -2;
824 if (PyString_Check(subobj)) {
825 sub = PyString_AS_STRING(subobj);
826 n = PyString_GET_SIZE(subobj);
827 }
828 else if (PyUnicode_Check(subobj))
829 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
830 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000831 return -2;
832
833 if (last > len)
834 last = len;
835 if (last < 0)
836 last += len;
837 if (last < 0)
838 last = 0;
839 if (i < 0)
840 i += len;
841 if (i < 0)
842 i = 0;
843
Guido van Rossum4c08d552000-03-10 22:55:18 +0000844 if (dir > 0) {
845 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000846 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000847 last -= n;
848 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000849 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000850 return (long)i;
851 }
852 else {
853 int j;
854
855 if (n == 0 && i <= last)
856 return (long)last;
857 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000858 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000859 return (long)j;
860 }
861
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000862 return -1;
863}
864
865
866static char find__doc__[] =
867"S.find(sub [,start [,end]]) -> int\n\
868\n\
869Return the lowest index in S where substring sub is found,\n\
870such that sub is contained within s[start,end]. Optional\n\
871arguments start and end are interpreted as in slice notation.\n\
872\n\
873Return -1 on failure.";
874
875static PyObject *
876string_find(self, args)
877 PyStringObject *self;
878 PyObject *args;
879{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000880 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000881 if (result == -2)
882 return NULL;
883 return PyInt_FromLong(result);
884}
885
886
887static char index__doc__[] =
888"S.index(sub [,start [,end]]) -> int\n\
889\n\
890Like S.find() but raise ValueError when the substring is not found.";
891
892static PyObject *
893string_index(self, args)
894 PyStringObject *self;
895 PyObject *args;
896{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000897 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000898 if (result == -2)
899 return NULL;
900 if (result == -1) {
901 PyErr_SetString(PyExc_ValueError,
902 "substring not found in string.index");
903 return NULL;
904 }
905 return PyInt_FromLong(result);
906}
907
908
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000909static char rfind__doc__[] =
910"S.rfind(sub [,start [,end]]) -> int\n\
911\n\
912Return the highest index in S where substring sub is found,\n\
913such that sub is contained within s[start,end]. Optional\n\
914arguments start and end are interpreted as in slice notation.\n\
915\n\
916Return -1 on failure.";
917
918static PyObject *
919string_rfind(self, args)
920 PyStringObject *self;
921 PyObject *args;
922{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000923 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000924 if (result == -2)
925 return NULL;
926 return PyInt_FromLong(result);
927}
928
929
930static char rindex__doc__[] =
931"S.rindex(sub [,start [,end]]) -> int\n\
932\n\
933Like S.rfind() but raise ValueError when the substring is not found.";
934
935static PyObject *
936string_rindex(self, args)
937 PyStringObject *self;
938 PyObject *args;
939{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000940 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000941 if (result == -2)
942 return NULL;
943 if (result == -1) {
944 PyErr_SetString(PyExc_ValueError,
945 "substring not found in string.rindex");
946 return NULL;
947 }
948 return PyInt_FromLong(result);
949}
950
951
952static PyObject *
953do_strip(self, args, striptype)
954 PyStringObject *self;
955 PyObject *args;
956 int striptype;
957{
958 char *s = PyString_AS_STRING(self);
959 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000960
Guido van Rossum43713e52000-02-29 13:59:29 +0000961 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000962 return NULL;
963
964 i = 0;
965 if (striptype != RIGHTSTRIP) {
966 while (i < len && isspace(Py_CHARMASK(s[i]))) {
967 i++;
968 }
969 }
970
971 j = len;
972 if (striptype != LEFTSTRIP) {
973 do {
974 j--;
975 } while (j >= i && isspace(Py_CHARMASK(s[j])));
976 j++;
977 }
978
979 if (i == 0 && j == len) {
980 Py_INCREF(self);
981 return (PyObject*)self;
982 }
983 else
984 return PyString_FromStringAndSize(s+i, j-i);
985}
986
987
988static char strip__doc__[] =
989"S.strip() -> string\n\
990\n\
991Return a copy of the string S with leading and trailing\n\
992whitespace removed.";
993
994static PyObject *
995string_strip(self, args)
996 PyStringObject *self;
997 PyObject *args;
998{
999 return do_strip(self, args, BOTHSTRIP);
1000}
1001
1002
1003static char lstrip__doc__[] =
1004"S.lstrip() -> string\n\
1005\n\
1006Return a copy of the string S with leading whitespace removed.";
1007
1008static PyObject *
1009string_lstrip(self, args)
1010 PyStringObject *self;
1011 PyObject *args;
1012{
1013 return do_strip(self, args, LEFTSTRIP);
1014}
1015
1016
1017static char rstrip__doc__[] =
1018"S.rstrip() -> string\n\
1019\n\
1020Return a copy of the string S with trailing whitespace removed.";
1021
1022static PyObject *
1023string_rstrip(self, args)
1024 PyStringObject *self;
1025 PyObject *args;
1026{
1027 return do_strip(self, args, RIGHTSTRIP);
1028}
1029
1030
1031static char lower__doc__[] =
1032"S.lower() -> string\n\
1033\n\
1034Return a copy of the string S converted to lowercase.";
1035
1036static PyObject *
1037string_lower(self, args)
1038 PyStringObject *self;
1039 PyObject *args;
1040{
1041 char *s = PyString_AS_STRING(self), *s_new;
1042 int i, n = PyString_GET_SIZE(self);
1043 PyObject *new;
1044
Guido van Rossum43713e52000-02-29 13:59:29 +00001045 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001046 return NULL;
1047 new = PyString_FromStringAndSize(NULL, n);
1048 if (new == NULL)
1049 return NULL;
1050 s_new = PyString_AsString(new);
1051 for (i = 0; i < n; i++) {
1052 int c = Py_CHARMASK(*s++);
1053 if (isupper(c)) {
1054 *s_new = tolower(c);
1055 } else
1056 *s_new = c;
1057 s_new++;
1058 }
1059 return new;
1060}
1061
1062
1063static char upper__doc__[] =
1064"S.upper() -> string\n\
1065\n\
1066Return a copy of the string S converted to uppercase.";
1067
1068static PyObject *
1069string_upper(self, args)
1070 PyStringObject *self;
1071 PyObject *args;
1072{
1073 char *s = PyString_AS_STRING(self), *s_new;
1074 int i, n = PyString_GET_SIZE(self);
1075 PyObject *new;
1076
Guido van Rossum43713e52000-02-29 13:59:29 +00001077 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001078 return NULL;
1079 new = PyString_FromStringAndSize(NULL, n);
1080 if (new == NULL)
1081 return NULL;
1082 s_new = PyString_AsString(new);
1083 for (i = 0; i < n; i++) {
1084 int c = Py_CHARMASK(*s++);
1085 if (islower(c)) {
1086 *s_new = toupper(c);
1087 } else
1088 *s_new = c;
1089 s_new++;
1090 }
1091 return new;
1092}
1093
1094
Guido van Rossum4c08d552000-03-10 22:55:18 +00001095static char title__doc__[] =
1096"S.title() -> string\n\
1097\n\
1098Return a titlecased version of S, i.e. words start with uppercase\n\
1099characters, all remaining cased characters have lowercase.";
1100
1101static PyObject*
1102string_title(PyUnicodeObject *self, PyObject *args)
1103{
1104 char *s = PyString_AS_STRING(self), *s_new;
1105 int i, n = PyString_GET_SIZE(self);
1106 int previous_is_cased = 0;
1107 PyObject *new;
1108
1109 if (!PyArg_ParseTuple(args, ":title"))
1110 return NULL;
1111 new = PyString_FromStringAndSize(NULL, n);
1112 if (new == NULL)
1113 return NULL;
1114 s_new = PyString_AsString(new);
1115 for (i = 0; i < n; i++) {
1116 int c = Py_CHARMASK(*s++);
1117 if (islower(c)) {
1118 if (!previous_is_cased)
1119 c = toupper(c);
1120 previous_is_cased = 1;
1121 } else if (isupper(c)) {
1122 if (previous_is_cased)
1123 c = tolower(c);
1124 previous_is_cased = 1;
1125 } else
1126 previous_is_cased = 0;
1127 *s_new++ = c;
1128 }
1129 return new;
1130}
1131
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001132static char capitalize__doc__[] =
1133"S.capitalize() -> string\n\
1134\n\
1135Return a copy of the string S with only its first character\n\
1136capitalized.";
1137
1138static PyObject *
1139string_capitalize(self, args)
1140 PyStringObject *self;
1141 PyObject *args;
1142{
1143 char *s = PyString_AS_STRING(self), *s_new;
1144 int i, n = PyString_GET_SIZE(self);
1145 PyObject *new;
1146
Guido van Rossum43713e52000-02-29 13:59:29 +00001147 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001148 return NULL;
1149 new = PyString_FromStringAndSize(NULL, n);
1150 if (new == NULL)
1151 return NULL;
1152 s_new = PyString_AsString(new);
1153 if (0 < n) {
1154 int c = Py_CHARMASK(*s++);
1155 if (islower(c))
1156 *s_new = toupper(c);
1157 else
1158 *s_new = c;
1159 s_new++;
1160 }
1161 for (i = 1; i < n; i++) {
1162 int c = Py_CHARMASK(*s++);
1163 if (isupper(c))
1164 *s_new = tolower(c);
1165 else
1166 *s_new = c;
1167 s_new++;
1168 }
1169 return new;
1170}
1171
1172
1173static char count__doc__[] =
1174"S.count(sub[, start[, end]]) -> int\n\
1175\n\
1176Return the number of occurrences of substring sub in string\n\
1177S[start:end]. Optional arguments start and end are\n\
1178interpreted as in slice notation.";
1179
1180static PyObject *
1181string_count(self, args)
1182 PyStringObject *self;
1183 PyObject *args;
1184{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001185 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001186 int len = PyString_GET_SIZE(self), n;
1187 int i = 0, last = INT_MAX;
1188 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001189 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001190
Guido van Rossumc6821402000-05-08 14:08:05 +00001191 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1192 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001193 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001194
Guido van Rossum4c08d552000-03-10 22:55:18 +00001195 if (PyString_Check(subobj)) {
1196 sub = PyString_AS_STRING(subobj);
1197 n = PyString_GET_SIZE(subobj);
1198 }
1199 else if (PyUnicode_Check(subobj))
1200 return PyInt_FromLong(
1201 PyUnicode_Count((PyObject *)self, subobj, i, last));
1202 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1203 return NULL;
1204
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001205 if (last > len)
1206 last = len;
1207 if (last < 0)
1208 last += len;
1209 if (last < 0)
1210 last = 0;
1211 if (i < 0)
1212 i += len;
1213 if (i < 0)
1214 i = 0;
1215 m = last + 1 - n;
1216 if (n == 0)
1217 return PyInt_FromLong((long) (m-i));
1218
1219 r = 0;
1220 while (i < m) {
1221 if (!memcmp(s+i, sub, n)) {
1222 r++;
1223 i += n;
1224 } else {
1225 i++;
1226 }
1227 }
1228 return PyInt_FromLong((long) r);
1229}
1230
1231
1232static char swapcase__doc__[] =
1233"S.swapcase() -> string\n\
1234\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001235Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001236converted to lowercase and vice versa.";
1237
1238static PyObject *
1239string_swapcase(self, args)
1240 PyStringObject *self;
1241 PyObject *args;
1242{
1243 char *s = PyString_AS_STRING(self), *s_new;
1244 int i, n = PyString_GET_SIZE(self);
1245 PyObject *new;
1246
Guido van Rossum43713e52000-02-29 13:59:29 +00001247 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001248 return NULL;
1249 new = PyString_FromStringAndSize(NULL, n);
1250 if (new == NULL)
1251 return NULL;
1252 s_new = PyString_AsString(new);
1253 for (i = 0; i < n; i++) {
1254 int c = Py_CHARMASK(*s++);
1255 if (islower(c)) {
1256 *s_new = toupper(c);
1257 }
1258 else if (isupper(c)) {
1259 *s_new = tolower(c);
1260 }
1261 else
1262 *s_new = c;
1263 s_new++;
1264 }
1265 return new;
1266}
1267
1268
1269static char translate__doc__[] =
1270"S.translate(table [,deletechars]) -> string\n\
1271\n\
1272Return a copy of the string S, where all characters occurring\n\
1273in the optional argument deletechars are removed, and the\n\
1274remaining characters have been mapped through the given\n\
1275translation table, which must be a string of length 256.";
1276
1277static PyObject *
1278string_translate(self, args)
1279 PyStringObject *self;
1280 PyObject *args;
1281{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001282 register char *input, *output;
1283 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001284 register int i, c, changed = 0;
1285 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001287 int inlen, tablen, dellen = 0;
1288 PyObject *result;
1289 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001290 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001291
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292 if (!PyArg_ParseTuple(args, "O|O:translate",
1293 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001294 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001295
1296 if (PyString_Check(tableobj)) {
1297 table1 = PyString_AS_STRING(tableobj);
1298 tablen = PyString_GET_SIZE(tableobj);
1299 }
1300 else if (PyUnicode_Check(tableobj)) {
1301 /* Unicode .translate() does not support the deletechars
1302 parameter; instead a mapping to None will cause characters
1303 to be deleted. */
1304 if (delobj != NULL) {
1305 PyErr_SetString(PyExc_TypeError,
1306 "deletions are implemented differently for unicode");
1307 return NULL;
1308 }
1309 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1310 }
1311 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001312 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001313
1314 if (delobj != NULL) {
1315 if (PyString_Check(delobj)) {
1316 del_table = PyString_AS_STRING(delobj);
1317 dellen = PyString_GET_SIZE(delobj);
1318 }
1319 else if (PyUnicode_Check(delobj)) {
1320 PyErr_SetString(PyExc_TypeError,
1321 "deletions are implemented differently for unicode");
1322 return NULL;
1323 }
1324 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1325 return NULL;
1326
1327 if (tablen != 256) {
1328 PyErr_SetString(PyExc_ValueError,
1329 "translation table must be 256 characters long");
1330 return NULL;
1331 }
1332 }
1333 else {
1334 del_table = NULL;
1335 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336 }
1337
1338 table = table1;
1339 inlen = PyString_Size(input_obj);
1340 result = PyString_FromStringAndSize((char *)NULL, inlen);
1341 if (result == NULL)
1342 return NULL;
1343 output_start = output = PyString_AsString(result);
1344 input = PyString_AsString(input_obj);
1345
1346 if (dellen == 0) {
1347 /* If no deletions are required, use faster code */
1348 for (i = inlen; --i >= 0; ) {
1349 c = Py_CHARMASK(*input++);
1350 if (Py_CHARMASK((*output++ = table[c])) != c)
1351 changed = 1;
1352 }
1353 if (changed)
1354 return result;
1355 Py_DECREF(result);
1356 Py_INCREF(input_obj);
1357 return input_obj;
1358 }
1359
1360 for (i = 0; i < 256; i++)
1361 trans_table[i] = Py_CHARMASK(table[i]);
1362
1363 for (i = 0; i < dellen; i++)
1364 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1365
1366 for (i = inlen; --i >= 0; ) {
1367 c = Py_CHARMASK(*input++);
1368 if (trans_table[c] != -1)
1369 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1370 continue;
1371 changed = 1;
1372 }
1373 if (!changed) {
1374 Py_DECREF(result);
1375 Py_INCREF(input_obj);
1376 return input_obj;
1377 }
1378 /* Fix the size of the resulting string */
1379 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1380 return NULL;
1381 return result;
1382}
1383
1384
1385/* What follows is used for implementing replace(). Perry Stoll. */
1386
1387/*
1388 mymemfind
1389
1390 strstr replacement for arbitrary blocks of memory.
1391
Barry Warsaw51ac5802000-03-20 16:36:48 +00001392 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393 contents of memory pointed to by PAT. Returns the index into MEM if
1394 found, or -1 if not found. If len of PAT is greater than length of
1395 MEM, the function returns -1.
1396*/
1397static int
1398mymemfind(mem, len, pat, pat_len)
1399 char *mem;
1400 int len;
1401 char *pat;
1402 int pat_len;
1403{
1404 register int ii;
1405
1406 /* pattern can not occur in the last pat_len-1 chars */
1407 len -= pat_len;
1408
1409 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001410 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411 return ii;
1412 }
1413 }
1414 return -1;
1415}
1416
1417/*
1418 mymemcnt
1419
1420 Return the number of distinct times PAT is found in MEM.
1421 meaning mem=1111 and pat==11 returns 2.
1422 mem=11111 and pat==11 also return 2.
1423 */
1424static int
1425mymemcnt(mem, len, pat, pat_len)
1426 char *mem;
1427 int len;
1428 char *pat;
1429 int pat_len;
1430{
1431 register int offset = 0;
1432 int nfound = 0;
1433
1434 while (len >= 0) {
1435 offset = mymemfind(mem, len, pat, pat_len);
1436 if (offset == -1)
1437 break;
1438 mem += offset + pat_len;
1439 len -= offset + pat_len;
1440 nfound++;
1441 }
1442 return nfound;
1443}
1444
1445/*
1446 mymemreplace
1447
1448 Return a string in which all occurences of PAT in memory STR are
1449 replaced with SUB.
1450
1451 If length of PAT is less than length of STR or there are no occurences
1452 of PAT in STR, then the original string is returned. Otherwise, a new
1453 string is allocated here and returned.
1454
1455 on return, out_len is:
1456 the length of output string, or
1457 -1 if the input string is returned, or
1458 unchanged if an error occurs (no memory).
1459
1460 return value is:
1461 the new string allocated locally, or
1462 NULL if an error occurred.
1463*/
1464static char *
1465mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1466 char *str;
1467 int len; /* input string */
1468 char *pat;
1469 int pat_len; /* pattern string to find */
1470 char *sub;
1471 int sub_len; /* substitution string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 int count; /* number of replacements */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473 int *out_len;
1474
1475{
1476 char *out_s;
1477 char *new_s;
1478 int nfound, offset, new_len;
1479
1480 if (len == 0 || pat_len > len)
1481 goto return_same;
1482
1483 /* find length of output string */
1484 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001485 if (count < 0)
1486 count = INT_MAX;
1487 else if (nfound > count)
1488 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489 if (nfound == 0)
1490 goto return_same;
1491 new_len = len + nfound*(sub_len - pat_len);
1492
Guido van Rossumb18618d2000-05-03 23:44:39 +00001493 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 if (new_s == NULL) return NULL;
1495
1496 *out_len = new_len;
1497 out_s = new_s;
1498
1499 while (len > 0) {
1500 /* find index of next instance of pattern */
1501 offset = mymemfind(str, len, pat, pat_len);
1502 /* if not found, break out of loop */
1503 if (offset == -1) break;
1504
1505 /* copy non matching part of input string */
1506 memcpy(new_s, str, offset); /* copy part of str before pat */
1507 str += offset + pat_len; /* move str past pattern */
1508 len -= offset + pat_len; /* reduce length of str remaining */
1509
1510 /* copy substitute into the output string */
1511 new_s += offset; /* move new_s to dest for sub string */
1512 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1513 new_s += sub_len; /* offset new_s past sub string */
1514
1515 /* break when we've done count replacements */
1516 if (--count == 0) break;
1517 }
1518 /* copy any remaining values into output string */
1519 if (len > 0)
1520 memcpy(new_s, str, len);
1521 return out_s;
1522
1523 return_same:
1524 *out_len = -1;
1525 return str;
1526}
1527
1528
1529static char replace__doc__[] =
1530"S.replace (old, new[, maxsplit]) -> string\n\
1531\n\
1532Return a copy of string S with all occurrences of substring\n\
1533old replaced by new. If the optional argument maxsplit is\n\
1534given, only the first maxsplit occurrences are replaced.";
1535
1536static PyObject *
1537string_replace(self, args)
1538 PyStringObject *self;
1539 PyObject *args;
1540{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001541 const char *str = PyString_AS_STRING(self), *sub, *repl;
1542 char *new_s;
1543 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1544 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001546 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547
Guido van Rossum4c08d552000-03-10 22:55:18 +00001548 if (!PyArg_ParseTuple(args, "OO|i:replace",
1549 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001550 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001551
1552 if (PyString_Check(subobj)) {
1553 sub = PyString_AS_STRING(subobj);
1554 sub_len = PyString_GET_SIZE(subobj);
1555 }
1556 else if (PyUnicode_Check(subobj))
1557 return PyUnicode_Replace((PyObject *)self,
1558 subobj, replobj, count);
1559 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1560 return NULL;
1561
1562 if (PyString_Check(replobj)) {
1563 repl = PyString_AS_STRING(replobj);
1564 repl_len = PyString_GET_SIZE(replobj);
1565 }
1566 else if (PyUnicode_Check(replobj))
1567 return PyUnicode_Replace((PyObject *)self,
1568 subobj, replobj, count);
1569 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1570 return NULL;
1571
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001572 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001573 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574 return NULL;
1575 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001576 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001577 if (new_s == NULL) {
1578 PyErr_NoMemory();
1579 return NULL;
1580 }
1581 if (out_len == -1) {
1582 /* we're returning another reference to self */
1583 new = (PyObject*)self;
1584 Py_INCREF(new);
1585 }
1586 else {
1587 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001588 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589 }
1590 return new;
1591}
1592
1593
1594static char startswith__doc__[] =
1595"S.startswith(prefix[, start[, end]]) -> int\n\
1596\n\
1597Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1598optional start, test S beginning at that position. With optional end, stop\n\
1599comparing S at that position.";
1600
1601static PyObject *
1602string_startswith(self, args)
1603 PyStringObject *self;
1604 PyObject *args;
1605{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001608 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609 int plen;
1610 int start = 0;
1611 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001612 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613
Guido van Rossumc6821402000-05-08 14:08:05 +00001614 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1615 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 return NULL;
1617 if (PyString_Check(subobj)) {
1618 prefix = PyString_AS_STRING(subobj);
1619 plen = PyString_GET_SIZE(subobj);
1620 }
1621 else if (PyUnicode_Check(subobj))
1622 return PyInt_FromLong(
1623 PyUnicode_Tailmatch((PyObject *)self,
1624 subobj, start, end, -1));
1625 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 return NULL;
1627
1628 /* adopt Java semantics for index out of range. it is legal for
1629 * offset to be == plen, but this only returns true if prefix is
1630 * the empty string.
1631 */
1632 if (start < 0 || start+plen > len)
1633 return PyInt_FromLong(0);
1634
1635 if (!memcmp(str+start, prefix, plen)) {
1636 /* did the match end after the specified end? */
1637 if (end < 0)
1638 return PyInt_FromLong(1);
1639 else if (end - start < plen)
1640 return PyInt_FromLong(0);
1641 else
1642 return PyInt_FromLong(1);
1643 }
1644 else return PyInt_FromLong(0);
1645}
1646
1647
1648static char endswith__doc__[] =
1649"S.endswith(suffix[, start[, end]]) -> int\n\
1650\n\
1651Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1652optional start, test S beginning at that position. With optional end, stop\n\
1653comparing S at that position.";
1654
1655static PyObject *
1656string_endswith(self, args)
1657 PyStringObject *self;
1658 PyObject *args;
1659{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001660 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001662 const char* suffix;
1663 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001664 int start = 0;
1665 int end = -1;
1666 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001667 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668
Guido van Rossumc6821402000-05-08 14:08:05 +00001669 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1670 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671 return NULL;
1672 if (PyString_Check(subobj)) {
1673 suffix = PyString_AS_STRING(subobj);
1674 slen = PyString_GET_SIZE(subobj);
1675 }
1676 else if (PyUnicode_Check(subobj))
1677 return PyInt_FromLong(
1678 PyUnicode_Tailmatch((PyObject *)self,
1679 subobj, start, end, +1));
1680 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001681 return NULL;
1682
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684 return PyInt_FromLong(0);
1685
1686 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001687 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688
Guido van Rossum4c08d552000-03-10 22:55:18 +00001689 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690 return PyInt_FromLong(1);
1691 else return PyInt_FromLong(0);
1692}
1693
1694
Guido van Rossum4c08d552000-03-10 22:55:18 +00001695static char expandtabs__doc__[] =
1696"S.expandtabs([tabsize]) -> string\n\
1697\n\
1698Return a copy of S where all tab characters are expanded using spaces.\n\
1699If tabsize is not given, a tab size of 8 characters is assumed.";
1700
1701static PyObject*
1702string_expandtabs(PyStringObject *self, PyObject *args)
1703{
1704 const char *e, *p;
1705 char *q;
1706 int i, j;
1707 PyObject *u;
1708 int tabsize = 8;
1709
1710 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1711 return NULL;
1712
1713 /* First pass: determine size of ouput string */
1714 i = j = 0;
1715 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1716 for (p = PyString_AS_STRING(self); p < e; p++)
1717 if (*p == '\t') {
1718 if (tabsize > 0)
1719 j += tabsize - (j % tabsize);
1720 }
1721 else {
1722 j++;
1723 if (*p == '\n' || *p == '\r') {
1724 i += j;
1725 j = 0;
1726 }
1727 }
1728
1729 /* Second pass: create output string and fill it */
1730 u = PyString_FromStringAndSize(NULL, i + j);
1731 if (!u)
1732 return NULL;
1733
1734 j = 0;
1735 q = PyString_AS_STRING(u);
1736
1737 for (p = PyString_AS_STRING(self); p < e; p++)
1738 if (*p == '\t') {
1739 if (tabsize > 0) {
1740 i = tabsize - (j % tabsize);
1741 j += i;
1742 while (i--)
1743 *q++ = ' ';
1744 }
1745 }
1746 else {
1747 j++;
1748 *q++ = *p;
1749 if (*p == '\n' || *p == '\r')
1750 j = 0;
1751 }
1752
1753 return u;
1754}
1755
1756static
1757PyObject *pad(PyStringObject *self,
1758 int left,
1759 int right,
1760 char fill)
1761{
1762 PyObject *u;
1763
1764 if (left < 0)
1765 left = 0;
1766 if (right < 0)
1767 right = 0;
1768
1769 if (left == 0 && right == 0) {
1770 Py_INCREF(self);
1771 return (PyObject *)self;
1772 }
1773
1774 u = PyString_FromStringAndSize(NULL,
1775 left + PyString_GET_SIZE(self) + right);
1776 if (u) {
1777 if (left)
1778 memset(PyString_AS_STRING(u), fill, left);
1779 memcpy(PyString_AS_STRING(u) + left,
1780 PyString_AS_STRING(self),
1781 PyString_GET_SIZE(self));
1782 if (right)
1783 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1784 fill, right);
1785 }
1786
1787 return u;
1788}
1789
1790static char ljust__doc__[] =
1791"S.ljust(width) -> string\n\
1792\n\
1793Return S left justified in a string of length width. Padding is\n\
1794done using spaces.";
1795
1796static PyObject *
1797string_ljust(PyStringObject *self, PyObject *args)
1798{
1799 int width;
1800 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1801 return NULL;
1802
1803 if (PyString_GET_SIZE(self) >= width) {
1804 Py_INCREF(self);
1805 return (PyObject*) self;
1806 }
1807
1808 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1809}
1810
1811
1812static char rjust__doc__[] =
1813"S.rjust(width) -> string\n\
1814\n\
1815Return S right justified in a string of length width. Padding is\n\
1816done using spaces.";
1817
1818static PyObject *
1819string_rjust(PyStringObject *self, PyObject *args)
1820{
1821 int width;
1822 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1823 return NULL;
1824
1825 if (PyString_GET_SIZE(self) >= width) {
1826 Py_INCREF(self);
1827 return (PyObject*) self;
1828 }
1829
1830 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1831}
1832
1833
1834static char center__doc__[] =
1835"S.center(width) -> string\n\
1836\n\
1837Return S centered in a string of length width. Padding is done\n\
1838using spaces.";
1839
1840static PyObject *
1841string_center(PyStringObject *self, PyObject *args)
1842{
1843 int marg, left;
1844 int width;
1845
1846 if (!PyArg_ParseTuple(args, "i:center", &width))
1847 return NULL;
1848
1849 if (PyString_GET_SIZE(self) >= width) {
1850 Py_INCREF(self);
1851 return (PyObject*) self;
1852 }
1853
1854 marg = width - PyString_GET_SIZE(self);
1855 left = marg / 2 + (marg & width & 1);
1856
1857 return pad(self, left, marg - left, ' ');
1858}
1859
1860#if 0
1861static char zfill__doc__[] =
1862"S.zfill(width) -> string\n\
1863\n\
1864Pad a numeric string x with zeros on the left, to fill a field\n\
1865of the specified width. The string x is never truncated.";
1866
1867static PyObject *
1868string_zfill(PyStringObject *self, PyObject *args)
1869{
1870 int fill;
1871 PyObject *u;
1872 char *str;
1873
1874 int width;
1875 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1876 return NULL;
1877
1878 if (PyString_GET_SIZE(self) >= width) {
1879 Py_INCREF(self);
1880 return (PyObject*) self;
1881 }
1882
1883 fill = width - PyString_GET_SIZE(self);
1884
1885 u = pad(self, fill, 0, '0');
1886 if (u == NULL)
1887 return NULL;
1888
1889 str = PyString_AS_STRING(u);
1890 if (str[fill] == '+' || str[fill] == '-') {
1891 /* move sign to beginning of string */
1892 str[0] = str[fill];
1893 str[fill] = '0';
1894 }
1895
1896 return u;
1897}
1898#endif
1899
1900static char isspace__doc__[] =
1901"S.isspace() -> int\n\
1902\n\
1903Return 1 if there are only whitespace characters in S,\n\
19040 otherwise.";
1905
1906static PyObject*
1907string_isspace(PyStringObject *self, PyObject *args)
1908{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001909 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1910 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001911
1912 if (!PyArg_NoArgs(args))
1913 return NULL;
1914
1915 /* Shortcut for single character strings */
1916 if (PyString_GET_SIZE(self) == 1 &&
1917 isspace(*p))
1918 return PyInt_FromLong(1);
1919
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001920 /* Special case for empty strings */
1921 if (PyString_GET_SIZE(self) == 0)
1922 return PyInt_FromLong(0);
1923
Guido van Rossum4c08d552000-03-10 22:55:18 +00001924 e = p + PyString_GET_SIZE(self);
1925 for (; p < e; p++) {
1926 if (!isspace(*p))
1927 return PyInt_FromLong(0);
1928 }
1929 return PyInt_FromLong(1);
1930}
1931
1932
1933static char isdigit__doc__[] =
1934"S.isdigit() -> int\n\
1935\n\
1936Return 1 if there are only digit characters in S,\n\
19370 otherwise.";
1938
1939static PyObject*
1940string_isdigit(PyStringObject *self, PyObject *args)
1941{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001942 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1943 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001944
1945 if (!PyArg_NoArgs(args))
1946 return NULL;
1947
1948 /* Shortcut for single character strings */
1949 if (PyString_GET_SIZE(self) == 1 &&
1950 isdigit(*p))
1951 return PyInt_FromLong(1);
1952
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001953 /* Special case for empty strings */
1954 if (PyString_GET_SIZE(self) == 0)
1955 return PyInt_FromLong(0);
1956
Guido van Rossum4c08d552000-03-10 22:55:18 +00001957 e = p + PyString_GET_SIZE(self);
1958 for (; p < e; p++) {
1959 if (!isdigit(*p))
1960 return PyInt_FromLong(0);
1961 }
1962 return PyInt_FromLong(1);
1963}
1964
1965
1966static char islower__doc__[] =
1967"S.islower() -> int\n\
1968\n\
1969Return 1 if all cased characters in S are lowercase and there is\n\
1970at least one cased character in S, 0 otherwise.";
1971
1972static PyObject*
1973string_islower(PyStringObject *self, PyObject *args)
1974{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00001975 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
1976 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001977 int cased;
1978
1979 if (!PyArg_NoArgs(args))
1980 return NULL;
1981
1982 /* Shortcut for single character strings */
1983 if (PyString_GET_SIZE(self) == 1)
1984 return PyInt_FromLong(islower(*p) != 0);
1985
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00001986 /* Special case for empty strings */
1987 if (PyString_GET_SIZE(self) == 0)
1988 return PyInt_FromLong(0);
1989
Guido van Rossum4c08d552000-03-10 22:55:18 +00001990 e = p + PyString_GET_SIZE(self);
1991 cased = 0;
1992 for (; p < e; p++) {
1993 if (isupper(*p))
1994 return PyInt_FromLong(0);
1995 else if (!cased && islower(*p))
1996 cased = 1;
1997 }
1998 return PyInt_FromLong(cased);
1999}
2000
2001
2002static char isupper__doc__[] =
2003"S.isupper() -> int\n\
2004\n\
2005Return 1 if all cased characters in S are uppercase and there is\n\
2006at least one cased character in S, 0 otherwise.";
2007
2008static PyObject*
2009string_isupper(PyStringObject *self, PyObject *args)
2010{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002011 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2012 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002013 int cased;
2014
2015 if (!PyArg_NoArgs(args))
2016 return NULL;
2017
2018 /* Shortcut for single character strings */
2019 if (PyString_GET_SIZE(self) == 1)
2020 return PyInt_FromLong(isupper(*p) != 0);
2021
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002022 /* Special case for empty strings */
2023 if (PyString_GET_SIZE(self) == 0)
2024 return PyInt_FromLong(0);
2025
Guido van Rossum4c08d552000-03-10 22:55:18 +00002026 e = p + PyString_GET_SIZE(self);
2027 cased = 0;
2028 for (; p < e; p++) {
2029 if (islower(*p))
2030 return PyInt_FromLong(0);
2031 else if (!cased && isupper(*p))
2032 cased = 1;
2033 }
2034 return PyInt_FromLong(cased);
2035}
2036
2037
2038static char istitle__doc__[] =
2039"S.istitle() -> int\n\
2040\n\
2041Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2042may only follow uncased characters and lowercase characters only cased\n\
2043ones. Return 0 otherwise.";
2044
2045static PyObject*
2046string_istitle(PyStringObject *self, PyObject *args)
2047{
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002048 register const unsigned char *p = (unsigned char *) PyString_AS_STRING(self);
2049 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002050 int cased, previous_is_cased;
2051
2052 if (!PyArg_NoArgs(args))
2053 return NULL;
2054
2055 /* Shortcut for single character strings */
2056 if (PyString_GET_SIZE(self) == 1)
2057 return PyInt_FromLong(isupper(*p) != 0);
2058
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002059 /* Special case for empty strings */
2060 if (PyString_GET_SIZE(self) == 0)
2061 return PyInt_FromLong(0);
2062
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 e = p + PyString_GET_SIZE(self);
2064 cased = 0;
2065 previous_is_cased = 0;
2066 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002067 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002068
2069 if (isupper(ch)) {
2070 if (previous_is_cased)
2071 return PyInt_FromLong(0);
2072 previous_is_cased = 1;
2073 cased = 1;
2074 }
2075 else if (islower(ch)) {
2076 if (!previous_is_cased)
2077 return PyInt_FromLong(0);
2078 previous_is_cased = 1;
2079 cased = 1;
2080 }
2081 else
2082 previous_is_cased = 0;
2083 }
2084 return PyInt_FromLong(cased);
2085}
2086
2087
2088static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002089"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002090\n\
2091Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002092Line breaks are not included in the resulting list unless keepends\n\
2093is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002094
2095#define SPLIT_APPEND(data, left, right) \
2096 str = PyString_FromStringAndSize(data + left, right - left); \
2097 if (!str) \
2098 goto onError; \
2099 if (PyList_Append(list, str)) { \
2100 Py_DECREF(str); \
2101 goto onError; \
2102 } \
2103 else \
2104 Py_DECREF(str);
2105
2106static PyObject*
2107string_splitlines(PyStringObject *self, PyObject *args)
2108{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 register int i;
2110 register int j;
2111 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002112 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113 PyObject *list;
2114 PyObject *str;
2115 char *data;
2116
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002117 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002118 return NULL;
2119
2120 data = PyString_AS_STRING(self);
2121 len = PyString_GET_SIZE(self);
2122
Guido van Rossum4c08d552000-03-10 22:55:18 +00002123 list = PyList_New(0);
2124 if (!list)
2125 goto onError;
2126
2127 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002128 int eol;
2129
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130 /* Find a line and append it */
2131 while (i < len && data[i] != '\n' && data[i] != '\r')
2132 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002133
2134 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002135 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002136 if (i < len) {
2137 if (data[i] == '\r' && i + 1 < len &&
2138 data[i+1] == '\n')
2139 i += 2;
2140 else
2141 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002142 if (keepends)
2143 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002144 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002145 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002146 j = i;
2147 }
2148 if (j < len) {
2149 SPLIT_APPEND(data, j, len);
2150 }
2151
2152 return list;
2153
2154 onError:
2155 Py_DECREF(list);
2156 return NULL;
2157}
2158
2159#undef SPLIT_APPEND
2160
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161
2162static PyMethodDef
2163string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002164 /* Counterparts of the obsolete stropmodule functions; except
2165 string.maketrans(). */
2166 {"join", (PyCFunction)string_join, 1, join__doc__},
2167 {"split", (PyCFunction)string_split, 1, split__doc__},
2168 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2169 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2170 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2171 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2172 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2173 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2174 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2176 {"count", (PyCFunction)string_count, 1, count__doc__},
2177 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2178 {"find", (PyCFunction)string_find, 1, find__doc__},
2179 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2182 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2183 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2184 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2186 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2187 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2189 {"title", (PyCFunction)string_title, 1, title__doc__},
2190 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2191 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2192 {"center", (PyCFunction)string_center, 1, center__doc__},
2193 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2194 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2195#if 0
2196 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2197#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198 {NULL, NULL} /* sentinel */
2199};
2200
2201static PyObject *
2202string_getattr(s, name)
2203 PyStringObject *s;
2204 char *name;
2205{
2206 return Py_FindMethod(string_methods, (PyObject*)s, name);
2207}
2208
2209
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002210PyTypeObject PyString_Type = {
2211 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002212 0,
2213 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002214 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002215 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002216 (destructor)string_dealloc, /*tp_dealloc*/
2217 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002219 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002220 (cmpfunc)string_compare, /*tp_compare*/
2221 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002222 0, /*tp_as_number*/
2223 &string_as_sequence, /*tp_as_sequence*/
2224 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002225 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002226 0, /*tp_call*/
2227 0, /*tp_str*/
2228 0, /*tp_getattro*/
2229 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002230 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002231 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002232 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002233};
2234
2235void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002236PyString_Concat(pv, w)
2237 register PyObject **pv;
2238 register PyObject *w;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002239{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002240 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002241 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002242 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002243 if (w == NULL || !PyString_Check(*pv)) {
2244 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002245 *pv = NULL;
2246 return;
2247 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002248 v = string_concat((PyStringObject *) *pv, w);
2249 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002250 *pv = v;
2251}
2252
Guido van Rossum013142a1994-08-30 08:19:36 +00002253void
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002254PyString_ConcatAndDel(pv, w)
2255 register PyObject **pv;
2256 register PyObject *w;
Guido van Rossum013142a1994-08-30 08:19:36 +00002257{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002258 PyString_Concat(pv, w);
2259 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002260}
2261
2262
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002263/* The following function breaks the notion that strings are immutable:
2264 it changes the size of a string. We get away with this only if there
2265 is only one module referencing the object. You can also think of it
2266 as creating a new string object and destroying the old one, only
2267 more efficiently. In any case, don't use this if the string may
2268 already be known to some other part of the code... */
2269
2270int
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002271_PyString_Resize(pv, newsize)
2272 PyObject **pv;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002273 int newsize;
2274{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002275 register PyObject *v;
2276 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002277 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002278 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002279 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002280 Py_DECREF(v);
2281 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002282 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002283 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002284 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002285#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002286 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002287#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002288 _Py_ForgetReference(v);
2289 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002290 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002291 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002292 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002293 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002294 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002295 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002296 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002297 _Py_NewReference(*pv);
2298 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002299 sv->ob_size = newsize;
2300 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002301 return 0;
2302}
Guido van Rossume5372401993-03-16 12:15:04 +00002303
2304/* Helpers for formatstring */
2305
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002306static PyObject *
Guido van Rossume5372401993-03-16 12:15:04 +00002307getnextarg(args, arglen, p_argidx)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002308 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002309 int arglen;
2310 int *p_argidx;
2311{
2312 int argidx = *p_argidx;
2313 if (argidx < arglen) {
2314 (*p_argidx)++;
2315 if (arglen < 0)
2316 return args;
2317 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002318 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002319 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002320 PyErr_SetString(PyExc_TypeError,
2321 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002322 return NULL;
2323}
2324
2325#define F_LJUST (1<<0)
2326#define F_SIGN (1<<1)
2327#define F_BLANK (1<<2)
2328#define F_ALT (1<<3)
2329#define F_ZERO (1<<4)
2330
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002331static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002332formatfloat(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002333 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002334 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002335 int flags;
2336 int prec;
2337 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002338 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002339{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002340 /* fmt = '%#.' + `prec` + `type`
2341 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002342 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002343 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002344 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002345 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002346 if (prec < 0)
2347 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002348 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2349 type = 'g';
2350 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002351 /* worst case length calc to ensure no buffer overrun:
2352 fmt = %#.<prec>g
2353 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2354 for any double rep.)
2355 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2356 If prec=0 the effective precision is 1 (the leading digit is
2357 always given), therefore increase by one to 10+prec. */
2358 if (buflen <= (size_t)10 + (size_t)prec) {
2359 PyErr_SetString(PyExc_OverflowError,
2360 "formatted float is too long (precision too long?)");
2361 return -1;
2362 }
Guido van Rossume5372401993-03-16 12:15:04 +00002363 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002364 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002365}
2366
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002367static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002368formatint(buf, buflen, flags, prec, type, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002369 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002370 size_t buflen;
Guido van Rossume5372401993-03-16 12:15:04 +00002371 int flags;
2372 int prec;
2373 int type;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002374 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002375{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002376 /* fmt = '%#.' + `prec` + 'l' + `type`
2377 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002378 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002379 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002380 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002381 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002382 if (prec < 0)
2383 prec = 1;
2384 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002385 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2386 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2387 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2388 PyErr_SetString(PyExc_OverflowError,
2389 "formatted integer is too long (precision too long?)");
2390 return -1;
2391 }
Guido van Rossume5372401993-03-16 12:15:04 +00002392 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002393 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002394}
2395
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002396static int
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002397formatchar(buf, buflen, v)
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002398 char *buf;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002399 size_t buflen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002400 PyObject *v;
Guido van Rossume5372401993-03-16 12:15:04 +00002401{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002402 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002403 if (PyString_Check(v)) {
2404 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002405 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002406 }
2407 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002408 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002409 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002410 }
2411 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002412 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002413}
2414
Guido van Rossum013142a1994-08-30 08:19:36 +00002415
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002416/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2417
2418 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2419 chars are formatted. XXX This is a magic number. Each formatting
2420 routine does bounds checking to ensure no overflow, but a better
2421 solution may be to malloc a buffer of appropriate size for each
2422 format. For now, the current solution is sufficient.
2423*/
2424#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002425
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002426PyObject *
2427PyString_Format(format, args)
2428 PyObject *format;
2429 PyObject *args;
Guido van Rossume5372401993-03-16 12:15:04 +00002430{
2431 char *fmt, *res;
2432 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002433 int args_owned = 0;
Guido van Rossum90daa872000-04-10 13:47:21 +00002434 PyObject *result, *orig_args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002435 PyObject *dict = NULL;
2436 if (format == NULL || !PyString_Check(format) || args == NULL) {
2437 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002438 return NULL;
2439 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002440 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002441 fmt = PyString_AsString(format);
2442 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002443 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002444 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002445 if (result == NULL)
2446 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002447 res = PyString_AsString(result);
2448 if (PyTuple_Check(args)) {
2449 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002450 argidx = 0;
2451 }
2452 else {
2453 arglen = -1;
2454 argidx = -2;
2455 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002456 if (args->ob_type->tp_as_mapping)
2457 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002458 while (--fmtcnt >= 0) {
2459 if (*fmt != '%') {
2460 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002461 rescnt = fmtcnt + 100;
2462 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002463 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002464 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002465 res = PyString_AsString(result)
2466 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002467 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002468 }
2469 *res++ = *fmt++;
2470 }
2471 else {
2472 /* Got a format specifier */
2473 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002474 int width = -1;
2475 int prec = -1;
2476 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002477 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002478 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002479 PyObject *v = NULL;
2480 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002481 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002482 int sign;
2483 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002484 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002485 char *fmt_start = fmt;
2486
Guido van Rossumda9c2711996-12-05 21:58:58 +00002487 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002488 if (*fmt == '(') {
2489 char *keystart;
2490 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002491 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002492 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002493
2494 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002495 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002496 "format requires a mapping");
2497 goto error;
2498 }
2499 ++fmt;
2500 --fmtcnt;
2501 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002502 /* Skip over balanced parentheses */
2503 while (pcount > 0 && --fmtcnt >= 0) {
2504 if (*fmt == ')')
2505 --pcount;
2506 else if (*fmt == '(')
2507 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002508 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002509 }
2510 keylen = fmt - keystart - 1;
2511 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002512 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002513 "incomplete format key");
2514 goto error;
2515 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002516 key = PyString_FromStringAndSize(keystart,
2517 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002518 if (key == NULL)
2519 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002520 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002521 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002522 args_owned = 0;
2523 }
2524 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002525 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002526 if (args == NULL) {
2527 goto error;
2528 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002529 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002530 arglen = -1;
2531 argidx = -2;
2532 }
Guido van Rossume5372401993-03-16 12:15:04 +00002533 while (--fmtcnt >= 0) {
2534 switch (c = *fmt++) {
2535 case '-': flags |= F_LJUST; continue;
2536 case '+': flags |= F_SIGN; continue;
2537 case ' ': flags |= F_BLANK; continue;
2538 case '#': flags |= F_ALT; continue;
2539 case '0': flags |= F_ZERO; continue;
2540 }
2541 break;
2542 }
2543 if (c == '*') {
2544 v = getnextarg(args, arglen, &argidx);
2545 if (v == NULL)
2546 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002547 if (!PyInt_Check(v)) {
2548 PyErr_SetString(PyExc_TypeError,
2549 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002550 goto error;
2551 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002552 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002553 if (width < 0) {
2554 flags |= F_LJUST;
2555 width = -width;
2556 }
Guido van Rossume5372401993-03-16 12:15:04 +00002557 if (--fmtcnt >= 0)
2558 c = *fmt++;
2559 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002560 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002561 width = c - '0';
2562 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002563 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002564 if (!isdigit(c))
2565 break;
2566 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002567 PyErr_SetString(
2568 PyExc_ValueError,
2569 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002570 goto error;
2571 }
2572 width = width*10 + (c - '0');
2573 }
2574 }
2575 if (c == '.') {
2576 prec = 0;
2577 if (--fmtcnt >= 0)
2578 c = *fmt++;
2579 if (c == '*') {
2580 v = getnextarg(args, arglen, &argidx);
2581 if (v == NULL)
2582 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002583 if (!PyInt_Check(v)) {
2584 PyErr_SetString(
2585 PyExc_TypeError,
2586 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002587 goto error;
2588 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002589 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002590 if (prec < 0)
2591 prec = 0;
2592 if (--fmtcnt >= 0)
2593 c = *fmt++;
2594 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002595 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002596 prec = c - '0';
2597 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002598 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002599 if (!isdigit(c))
2600 break;
2601 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002602 PyErr_SetString(
2603 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002604 "prec too big");
2605 goto error;
2606 }
2607 prec = prec*10 + (c - '0');
2608 }
2609 }
2610 } /* prec */
2611 if (fmtcnt >= 0) {
2612 if (c == 'h' || c == 'l' || c == 'L') {
2613 size = c;
2614 if (--fmtcnt >= 0)
2615 c = *fmt++;
2616 }
2617 }
2618 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002619 PyErr_SetString(PyExc_ValueError,
2620 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002621 goto error;
2622 }
2623 if (c != '%') {
2624 v = getnextarg(args, arglen, &argidx);
2625 if (v == NULL)
2626 goto error;
2627 }
2628 sign = 0;
2629 fill = ' ';
2630 switch (c) {
2631 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002632 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002633 len = 1;
2634 break;
2635 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002636 case 'r':
2637 if (PyUnicode_Check(v)) {
2638 fmt = fmt_start;
2639 goto unicode;
2640 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002641 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002642 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002643 else
2644 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002645 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002646 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002647 if (!PyString_Check(temp)) {
2648 PyErr_SetString(PyExc_TypeError,
2649 "%s argument has non-string str()");
2650 goto error;
2651 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002652 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002653 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002654 if (prec >= 0 && len > prec)
2655 len = prec;
2656 break;
2657 case 'i':
2658 case 'd':
2659 case 'u':
2660 case 'o':
2661 case 'x':
2662 case 'X':
2663 if (c == 'i')
2664 c = 'd';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002665 pbuf = formatbuf;
2666 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002667 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002668 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002669 sign = (c == 'd');
Guido van Rossum4acdc231997-01-29 06:00:24 +00002670 if (flags&F_ZERO) {
Guido van Rossume5372401993-03-16 12:15:04 +00002671 fill = '0';
Guido van Rossum4acdc231997-01-29 06:00:24 +00002672 if ((flags&F_ALT) &&
2673 (c == 'x' || c == 'X') &&
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002674 pbuf[0] == '0' && pbuf[1] == c) {
2675 *res++ = *pbuf++;
2676 *res++ = *pbuf++;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002677 rescnt -= 2;
2678 len -= 2;
2679 width -= 2;
2680 if (width < 0)
2681 width = 0;
2682 }
2683 }
Guido van Rossume5372401993-03-16 12:15:04 +00002684 break;
2685 case 'e':
2686 case 'E':
2687 case 'f':
2688 case 'g':
2689 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002690 pbuf = formatbuf;
2691 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002692 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002693 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002694 sign = 1;
2695 if (flags&F_ZERO)
2696 fill = '0';
2697 break;
2698 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002699 pbuf = formatbuf;
2700 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002701 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002702 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00002703 break;
2704 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00002705 PyErr_Format(PyExc_ValueError,
2706 "unsupported format character '%c' (0x%x)",
2707 c, c);
Guido van Rossume5372401993-03-16 12:15:04 +00002708 goto error;
2709 }
2710 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002711 if (*pbuf == '-' || *pbuf == '+') {
2712 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00002713 len--;
2714 }
2715 else if (flags & F_SIGN)
2716 sign = '+';
2717 else if (flags & F_BLANK)
2718 sign = ' ';
2719 else
2720 sign = '\0';
2721 }
2722 if (width < len)
2723 width = len;
2724 if (rescnt < width + (sign != '\0')) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002725 reslen -= rescnt;
2726 rescnt = width + fmtcnt + 100;
2727 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002728 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002729 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002730 res = PyString_AsString(result)
2731 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002732 }
2733 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00002734 if (fill != ' ')
2735 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00002736 rescnt--;
2737 if (width > len)
2738 width--;
2739 }
2740 if (width > len && !(flags&F_LJUST)) {
2741 do {
2742 --rescnt;
2743 *res++ = fill;
2744 } while (--width > len);
2745 }
Guido van Rossum71e57d01993-11-11 15:03:51 +00002746 if (sign && fill == ' ')
Guido van Rossum6938a291993-11-11 14:51:57 +00002747 *res++ = sign;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002748 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00002749 res += len;
2750 rescnt -= len;
2751 while (--width >= len) {
2752 --rescnt;
2753 *res++ = ' ';
2754 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002755 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002756 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002757 "not all arguments converted");
2758 goto error;
2759 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002760 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002761 } /* '%' */
2762 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00002763 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002764 PyErr_SetString(PyExc_TypeError,
2765 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00002766 goto error;
2767 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002768 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002769 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002770 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002771 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00002772 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00002773
2774 unicode:
2775 if (args_owned) {
2776 Py_DECREF(args);
2777 args_owned = 0;
2778 }
2779 /* Fiddle args right (remove the first argidx-1 arguments) */
2780 --argidx;
2781 if (PyTuple_Check(orig_args) && argidx > 0) {
2782 PyObject *v;
2783 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2784 v = PyTuple_New(n);
2785 if (v == NULL)
2786 goto error;
2787 while (--n >= 0) {
2788 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2789 Py_INCREF(w);
2790 PyTuple_SET_ITEM(v, n, w);
2791 }
2792 args = v;
2793 } else {
2794 Py_INCREF(orig_args);
2795 args = orig_args;
2796 }
2797 /* Paste rest of format string to what we have of the result
2798 string; we reuse result for this */
2799 rescnt = res - PyString_AS_STRING(result);
2800 fmtcnt = PyString_GET_SIZE(format) - \
2801 (fmt - PyString_AS_STRING(format));
2802 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2803 Py_DECREF(args);
2804 goto error;
2805 }
2806 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2807 format = result;
2808 /* Let Unicode do its magic */
2809 result = PyUnicode_Format(format, args);
2810 Py_DECREF(format);
2811 Py_DECREF(args);
2812 return result;
2813
Guido van Rossume5372401993-03-16 12:15:04 +00002814 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002815 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002816 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002817 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00002818 }
Guido van Rossume5372401993-03-16 12:15:04 +00002819 return NULL;
2820}
Guido van Rossum2a61e741997-01-18 07:55:05 +00002821
2822
2823#ifdef INTERN_STRINGS
2824
2825static PyObject *interned;
2826
2827void
2828PyString_InternInPlace(p)
2829 PyObject **p;
2830{
2831 register PyStringObject *s = (PyStringObject *)(*p);
2832 PyObject *t;
2833 if (s == NULL || !PyString_Check(s))
2834 Py_FatalError("PyString_InternInPlace: strings only please!");
2835 if ((t = s->ob_sinterned) != NULL) {
2836 if (t == (PyObject *)s)
2837 return;
2838 Py_INCREF(t);
2839 *p = t;
2840 Py_DECREF(s);
2841 return;
2842 }
2843 if (interned == NULL) {
2844 interned = PyDict_New();
2845 if (interned == NULL)
2846 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00002847 }
2848 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2849 Py_INCREF(t);
2850 *p = s->ob_sinterned = t;
2851 Py_DECREF(s);
2852 return;
2853 }
2854 t = (PyObject *)s;
2855 if (PyDict_SetItem(interned, t, t) == 0) {
2856 s->ob_sinterned = t;
2857 return;
2858 }
2859 PyErr_Clear();
2860}
2861
2862
2863PyObject *
2864PyString_InternFromString(cp)
2865 const char *cp;
2866{
2867 PyObject *s = PyString_FromString(cp);
2868 if (s == NULL)
2869 return NULL;
2870 PyString_InternInPlace(&s);
2871 return s;
2872}
2873
2874#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002875
2876void
2877PyString_Fini()
2878{
2879 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00002880 for (i = 0; i < UCHAR_MAX + 1; i++) {
2881 Py_XDECREF(characters[i]);
2882 characters[i] = NULL;
2883 }
2884#ifndef DONT_SHARE_SHORT_STRINGS
2885 Py_XDECREF(nullstring);
2886 nullstring = NULL;
2887#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00002888#ifdef INTERN_STRINGS
2889 if (interned) {
2890 int pos, changed;
2891 PyObject *key, *value;
2892 do {
2893 changed = 0;
2894 pos = 0;
2895 while (PyDict_Next(interned, &pos, &key, &value)) {
2896 if (key->ob_refcnt == 2 && key == value) {
2897 PyDict_DelItem(interned, key);
2898 changed = 1;
2899 }
2900 }
2901 } while (changed);
2902 }
2903#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00002904}