blob: 3b4ddafc113cf85efede0162866f38d0a4f10be7 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00007/* Our own locale-independent ctype.h-like macros */
8/* XXX Move into a header file? */
9
10#define FLAG_LOWER 0x01
11#define FLAG_UPPER 0x02
12#define FLAG_ALPHA (FLAG_LOWER|FLAG_UPPER)
13#define FLAG_DIGIT 0x04
14#define FLAG_ALNUM (FLAG_ALPHA|FLAG_DIGIT)
15#define FLAG_SPACE 0x08
16#define FLAG_XDIGIT 0x10
17
18static unsigned int ctype_table[256] = {
19 0, /* 0x0 '\x00' */
20 0, /* 0x1 '\x01' */
21 0, /* 0x2 '\x02' */
22 0, /* 0x3 '\x03' */
23 0, /* 0x4 '\x04' */
24 0, /* 0x5 '\x05' */
25 0, /* 0x6 '\x06' */
26 0, /* 0x7 '\x07' */
27 0, /* 0x8 '\x08' */
28 FLAG_SPACE, /* 0x9 '\t' */
29 FLAG_SPACE, /* 0xa '\n' */
30 FLAG_SPACE, /* 0xb '\v' */
31 FLAG_SPACE, /* 0xc '\f' */
32 FLAG_SPACE, /* 0xd '\r' */
33 0, /* 0xe '\x0e' */
34 0, /* 0xf '\x0f' */
35 0, /* 0x10 '\x10' */
36 0, /* 0x11 '\x11' */
37 0, /* 0x12 '\x12' */
38 0, /* 0x13 '\x13' */
39 0, /* 0x14 '\x14' */
40 0, /* 0x15 '\x15' */
41 0, /* 0x16 '\x16' */
42 0, /* 0x17 '\x17' */
43 0, /* 0x18 '\x18' */
44 0, /* 0x19 '\x19' */
45 0, /* 0x1a '\x1a' */
46 0, /* 0x1b '\x1b' */
47 0, /* 0x1c '\x1c' */
48 0, /* 0x1d '\x1d' */
49 0, /* 0x1e '\x1e' */
50 0, /* 0x1f '\x1f' */
51 FLAG_SPACE, /* 0x20 ' ' */
52 0, /* 0x21 '!' */
53 0, /* 0x22 '"' */
54 0, /* 0x23 '#' */
55 0, /* 0x24 '$' */
56 0, /* 0x25 '%' */
57 0, /* 0x26 '&' */
58 0, /* 0x27 "'" */
59 0, /* 0x28 '(' */
60 0, /* 0x29 ')' */
61 0, /* 0x2a '*' */
62 0, /* 0x2b '+' */
63 0, /* 0x2c ',' */
64 0, /* 0x2d '-' */
65 0, /* 0x2e '.' */
66 0, /* 0x2f '/' */
67 FLAG_DIGIT|FLAG_XDIGIT, /* 0x30 '0' */
68 FLAG_DIGIT|FLAG_XDIGIT, /* 0x31 '1' */
69 FLAG_DIGIT|FLAG_XDIGIT, /* 0x32 '2' */
70 FLAG_DIGIT|FLAG_XDIGIT, /* 0x33 '3' */
71 FLAG_DIGIT|FLAG_XDIGIT, /* 0x34 '4' */
72 FLAG_DIGIT|FLAG_XDIGIT, /* 0x35 '5' */
73 FLAG_DIGIT|FLAG_XDIGIT, /* 0x36 '6' */
74 FLAG_DIGIT|FLAG_XDIGIT, /* 0x37 '7' */
75 FLAG_DIGIT|FLAG_XDIGIT, /* 0x38 '8' */
76 FLAG_DIGIT|FLAG_XDIGIT, /* 0x39 '9' */
77 0, /* 0x3a ':' */
78 0, /* 0x3b ';' */
79 0, /* 0x3c '<' */
80 0, /* 0x3d '=' */
81 0, /* 0x3e '>' */
82 0, /* 0x3f '?' */
83 0, /* 0x40 '@' */
84 FLAG_UPPER|FLAG_XDIGIT, /* 0x41 'A' */
85 FLAG_UPPER|FLAG_XDIGIT, /* 0x42 'B' */
86 FLAG_UPPER|FLAG_XDIGIT, /* 0x43 'C' */
87 FLAG_UPPER|FLAG_XDIGIT, /* 0x44 'D' */
88 FLAG_UPPER|FLAG_XDIGIT, /* 0x45 'E' */
89 FLAG_UPPER|FLAG_XDIGIT, /* 0x46 'F' */
90 FLAG_UPPER, /* 0x47 'G' */
91 FLAG_UPPER, /* 0x48 'H' */
92 FLAG_UPPER, /* 0x49 'I' */
93 FLAG_UPPER, /* 0x4a 'J' */
94 FLAG_UPPER, /* 0x4b 'K' */
95 FLAG_UPPER, /* 0x4c 'L' */
96 FLAG_UPPER, /* 0x4d 'M' */
97 FLAG_UPPER, /* 0x4e 'N' */
98 FLAG_UPPER, /* 0x4f 'O' */
99 FLAG_UPPER, /* 0x50 'P' */
100 FLAG_UPPER, /* 0x51 'Q' */
101 FLAG_UPPER, /* 0x52 'R' */
102 FLAG_UPPER, /* 0x53 'S' */
103 FLAG_UPPER, /* 0x54 'T' */
104 FLAG_UPPER, /* 0x55 'U' */
105 FLAG_UPPER, /* 0x56 'V' */
106 FLAG_UPPER, /* 0x57 'W' */
107 FLAG_UPPER, /* 0x58 'X' */
108 FLAG_UPPER, /* 0x59 'Y' */
109 FLAG_UPPER, /* 0x5a 'Z' */
110 0, /* 0x5b '[' */
111 0, /* 0x5c '\\' */
112 0, /* 0x5d ']' */
113 0, /* 0x5e '^' */
114 0, /* 0x5f '_' */
115 0, /* 0x60 '`' */
116 FLAG_LOWER|FLAG_XDIGIT, /* 0x61 'a' */
117 FLAG_LOWER|FLAG_XDIGIT, /* 0x62 'b' */
118 FLAG_LOWER|FLAG_XDIGIT, /* 0x63 'c' */
119 FLAG_LOWER|FLAG_XDIGIT, /* 0x64 'd' */
120 FLAG_LOWER|FLAG_XDIGIT, /* 0x65 'e' */
121 FLAG_LOWER|FLAG_XDIGIT, /* 0x66 'f' */
122 FLAG_LOWER, /* 0x67 'g' */
123 FLAG_LOWER, /* 0x68 'h' */
124 FLAG_LOWER, /* 0x69 'i' */
125 FLAG_LOWER, /* 0x6a 'j' */
126 FLAG_LOWER, /* 0x6b 'k' */
127 FLAG_LOWER, /* 0x6c 'l' */
128 FLAG_LOWER, /* 0x6d 'm' */
129 FLAG_LOWER, /* 0x6e 'n' */
130 FLAG_LOWER, /* 0x6f 'o' */
131 FLAG_LOWER, /* 0x70 'p' */
132 FLAG_LOWER, /* 0x71 'q' */
133 FLAG_LOWER, /* 0x72 'r' */
134 FLAG_LOWER, /* 0x73 's' */
135 FLAG_LOWER, /* 0x74 't' */
136 FLAG_LOWER, /* 0x75 'u' */
137 FLAG_LOWER, /* 0x76 'v' */
138 FLAG_LOWER, /* 0x77 'w' */
139 FLAG_LOWER, /* 0x78 'x' */
140 FLAG_LOWER, /* 0x79 'y' */
141 FLAG_LOWER, /* 0x7a 'z' */
142 0, /* 0x7b '{' */
143 0, /* 0x7c '|' */
144 0, /* 0x7d '}' */
145 0, /* 0x7e '~' */
146 0, /* 0x7f '\x7f' */
147 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
148 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
149 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
151 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
152 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
153 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
154 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
155};
156
157#define ISLOWER(c) (ctype_table[Py_CHARMASK(c)] & FLAG_LOWER)
158#define ISUPPER(c) (ctype_table[Py_CHARMASK(c)] & FLAG_UPPER)
159#define ISALPHA(c) (ctype_table[Py_CHARMASK(c)] & FLAG_ALPHA)
160#define ISDIGIT(c) (ctype_table[Py_CHARMASK(c)] & FLAG_DIGIT)
161#define ISXDIGIT(c) (ctype_table[Py_CHARMASK(c)] & FLAG_XDIGIT)
162#define ISALNUM(c) (ctype_table[Py_CHARMASK(c)] & FLAG_ALNUM)
163#define ISSPACE(c) (ctype_table[Py_CHARMASK(c)] & FLAG_SPACE)
164
165#undef islower
166#define islower(c) undefined_islower(c)
167#undef isupper
168#define isupper(c) undefined_isupper(c)
169#undef isalpha
170#define isalpha(c) undefined_isalpha(c)
171#undef isdigit
172#define isdigit(c) undefined_isdigit(c)
173#undef isxdigit
174#define isxdigit(c) undefined_isxdigit(c)
175#undef isalnum
176#define isalnum(c) undefined_isalnum(c)
177#undef isspace
178#define isspace(c) undefined_isspace(c)
179
180static unsigned char ctype_tolower[256] = {
181 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
182 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
183 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
184 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
185 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
186 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
187 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
188 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
189 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
190 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
191 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
192 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
193 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
194 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
195 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
196 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
197 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
198 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
199 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
200 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
201 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
202 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
203 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
204 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
205 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
206 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
207 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
208 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
209 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
210 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
211 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
212 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
213};
214
215static unsigned char ctype_toupper[256] = {
216 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
217 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
218 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
219 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
220 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
221 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
222 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
223 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
224 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
225 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
226 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
227 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
228 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
229 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
230 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
231 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
232 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
233 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
234 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
235 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
236 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
237 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
238 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
239 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
240 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
241 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
242 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
243 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
244 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
245 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
246 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
247 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
248};
249
250#define TOLOWER(c) (ctype_tolower[Py_CHARMASK(c)])
251#define TOUPPER(c) (ctype_toupper[Py_CHARMASK(c)])
252
253#undef tolower
254#define tolower(c) undefined_tolower(c)
255#undef toupper
256#define toupper(c) undefined_toupper(c)
Guido van Rossum013142a1994-08-30 08:19:36 +0000257
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000258#ifdef COUNT_ALLOCS
259int null_strings, one_strings;
260#endif
261
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000262static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000263static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000264
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000265/* This dictionary holds all interned strings. Note that references to
266 strings in this dictionary are *not* counted in the string's ob_refcnt.
267 When the interned string reaches a refcnt of 0 the string deallocation
268 function will delete the reference from this dictionary.
269
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000270 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000271 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
272*/
273static PyObject *interned;
274
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000275/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000276 For both PyString_FromString() and PyString_FromStringAndSize(), the
277 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +0000278 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +0000279
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000280 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +0000281 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +0000282
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000283 For PyString_FromStringAndSize(), the parameter the parameter `str' is
284 either NULL or else points to a string containing at least `size' bytes.
285 For PyString_FromStringAndSize(), the string in the `str' parameter does
286 not have to be null-terminated. (Therefore it is safe to construct a
287 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
288 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
289 bytes (setting the last byte to the null terminating character) and you can
290 fill in the data yourself. If `str' is non-NULL then the resulting
291 PyString object must be treated as immutable and you must not fill in nor
292 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +0000293
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000294 The PyObject member `op->ob_size', which denotes the number of "extra
295 items" in a variable-size object, will contain the number of bytes
296 allocated for string data, not counting the null terminating character. It
297 is therefore equal to the equal to the `size' parameter (for
298 PyString_FromStringAndSize()) or the length of the string in the `str'
299 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000300*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000301PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000302PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303{
Tim Peters9e897f42001-05-09 07:37:07 +0000304 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +0000305 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000306 if (size == 0 && (op = nullstring) != NULL) {
307#ifdef COUNT_ALLOCS
308 null_strings++;
309#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000310 Py_INCREF(op);
311 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000312 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000313 if (size == 1 && str != NULL &&
314 (op = characters[*str & UCHAR_MAX]) != NULL)
315 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000316#ifdef COUNT_ALLOCS
317 one_strings++;
318#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000319 Py_INCREF(op);
320 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000321 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000322
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000323 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000324 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000325 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000326 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000327 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000328 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000329 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000330 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000331 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000332 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +0000333 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000334 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000335 PyObject *t = (PyObject *)op;
336 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000337 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000338 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000339 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000340 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000341 PyObject *t = (PyObject *)op;
342 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000343 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000344 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000345 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000346 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000347 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348}
349
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000350PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000351PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000352{
Tim Peters62de65b2001-12-06 20:29:32 +0000353 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000354 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000355
356 assert(str != NULL);
357 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000358 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000359 PyErr_SetString(PyExc_OverflowError,
360 "string is too long for a Python string");
361 return NULL;
362 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000363 if (size == 0 && (op = nullstring) != NULL) {
364#ifdef COUNT_ALLOCS
365 null_strings++;
366#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000367 Py_INCREF(op);
368 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000369 }
370 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
371#ifdef COUNT_ALLOCS
372 one_strings++;
373#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000374 Py_INCREF(op);
375 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000376 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000377
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000378 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000379 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000380 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000381 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000382 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000383 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000384 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000385 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000386 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000387 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000388 PyObject *t = (PyObject *)op;
389 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000390 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000391 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000392 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000393 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000394 PyObject *t = (PyObject *)op;
395 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000396 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000397 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000398 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000399 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000400 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401}
402
Barry Warsawdadace02001-08-24 18:32:06 +0000403PyObject *
404PyString_FromFormatV(const char *format, va_list vargs)
405{
Tim Petersc15c4f12001-10-02 21:32:07 +0000406 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000407 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000408 const char* f;
409 char *s;
410 PyObject* string;
411
Tim Petersc15c4f12001-10-02 21:32:07 +0000412#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000413 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000414#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000415#ifdef __va_copy
416 __va_copy(count, vargs);
417#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000418 count = vargs;
419#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000420#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000421 /* step 1: figure out how large a buffer we need */
422 for (f = format; *f; f++) {
423 if (*f == '%') {
424 const char* p = f;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000425 while (*++f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000426 ;
427
Thomas Wouters477c8d52006-05-27 19:21:47 +0000428 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
429 * they don't affect the amount of space we reserve.
430 */
431 if ((*f == 'l' || *f == 'z') &&
432 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000433 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000434
Barry Warsawdadace02001-08-24 18:32:06 +0000435 switch (*f) {
436 case 'c':
437 (void)va_arg(count, int);
438 /* fall through... */
439 case '%':
440 n++;
441 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000442 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000443 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000444 /* 20 bytes is enough to hold a 64-bit
445 integer. Decimal takes the most space.
446 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000447 n += 20;
448 break;
449 case 's':
450 s = va_arg(count, char*);
451 n += strlen(s);
452 break;
453 case 'p':
454 (void) va_arg(count, int);
455 /* maximum 64-bit pointer representation:
456 * 0xffffffffffffffff
457 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000458 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000459 */
460 n += 19;
461 break;
462 default:
463 /* if we stumble upon an unknown
464 formatting code, copy the rest of
465 the format string to the output
466 string. (we cannot just skip the
467 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000468 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000469 n += strlen(p);
470 goto expand;
471 }
472 } else
473 n++;
474 }
475 expand:
476 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000477 /* Since we've analyzed how much space we need for the worst case,
478 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000479 string = PyString_FromStringAndSize(NULL, n);
480 if (!string)
481 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000482
Barry Warsawdadace02001-08-24 18:32:06 +0000483 s = PyString_AsString(string);
484
485 for (f = format; *f; f++) {
486 if (*f == '%') {
487 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000488 Py_ssize_t i;
489 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000490 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000491 /* parse the width.precision part (we're only
492 interested in the precision value, if any) */
493 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000494 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000495 n = (n*10) + *f++ - '0';
496 if (*f == '.') {
497 f++;
498 n = 0;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000499 while (ISDIGIT(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000500 n = (n*10) + *f++ - '0';
501 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000502 while (*f && *f != '%' && !ISALPHA(*f))
Barry Warsawdadace02001-08-24 18:32:06 +0000503 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000504 /* handle the long flag, but only for %ld and %lu.
505 others can be added when necessary. */
506 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000507 longflag = 1;
508 ++f;
509 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000510 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000511 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000512 size_tflag = 1;
513 ++f;
514 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000515
Barry Warsawdadace02001-08-24 18:32:06 +0000516 switch (*f) {
517 case 'c':
518 *s++ = va_arg(vargs, int);
519 break;
520 case 'd':
521 if (longflag)
522 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000523 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000524 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
525 va_arg(vargs, Py_ssize_t));
526 else
527 sprintf(s, "%d", va_arg(vargs, int));
528 s += strlen(s);
529 break;
530 case 'u':
531 if (longflag)
532 sprintf(s, "%lu",
533 va_arg(vargs, unsigned long));
534 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000535 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
536 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000537 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000538 sprintf(s, "%u",
539 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000540 s += strlen(s);
541 break;
542 case 'i':
543 sprintf(s, "%i", va_arg(vargs, int));
544 s += strlen(s);
545 break;
546 case 'x':
547 sprintf(s, "%x", va_arg(vargs, int));
548 s += strlen(s);
549 break;
550 case 's':
551 p = va_arg(vargs, char*);
552 i = strlen(p);
553 if (n > 0 && i > n)
554 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000555 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000556 s += i;
557 break;
558 case 'p':
559 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000560 /* %p is ill-defined: ensure leading 0x. */
561 if (s[1] == 'X')
562 s[1] = 'x';
563 else if (s[1] != 'x') {
564 memmove(s+2, s, strlen(s)+1);
565 s[0] = '0';
566 s[1] = 'x';
567 }
Barry Warsawdadace02001-08-24 18:32:06 +0000568 s += strlen(s);
569 break;
570 case '%':
571 *s++ = '%';
572 break;
573 default:
574 strcpy(s, p);
575 s += strlen(s);
576 goto end;
577 }
578 } else
579 *s++ = *f;
580 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000581
Barry Warsawdadace02001-08-24 18:32:06 +0000582 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000583 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000584 return string;
585}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000586
Barry Warsawdadace02001-08-24 18:32:06 +0000587PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000588PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000589{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000590 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000591 va_list vargs;
592
593#ifdef HAVE_STDARG_PROTOTYPES
594 va_start(vargs, format);
595#else
596 va_start(vargs);
597#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000598 ret = PyString_FromFormatV(format, vargs);
599 va_end(vargs);
600 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000601}
602
603
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000604PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000605 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000606 const char *encoding,
607 const char *errors)
608{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000609 PyObject *v, *str;
610
611 str = PyString_FromStringAndSize(s, size);
612 if (str == NULL)
613 return NULL;
614 v = PyString_AsDecodedString(str, encoding, errors);
615 Py_DECREF(str);
616 return v;
617}
618
619PyObject *PyString_AsDecodedObject(PyObject *str,
620 const char *encoding,
621 const char *errors)
622{
623 PyObject *v;
624
625 if (!PyString_Check(str)) {
626 PyErr_BadArgument();
627 goto onError;
628 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000629
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000630 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000631 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000632 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000633
634 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000635 v = PyCodec_Decode(str, encoding, errors);
636 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000637 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000638
639 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000640
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000641 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000642 return NULL;
643}
644
645PyObject *PyString_AsDecodedString(PyObject *str,
646 const char *encoding,
647 const char *errors)
648{
649 PyObject *v;
650
651 v = PyString_AsDecodedObject(str, encoding, errors);
652 if (v == NULL)
653 goto onError;
654
655 /* Convert Unicode to a string using the default encoding */
656 if (PyUnicode_Check(v)) {
657 PyObject *temp = v;
658 v = PyUnicode_AsEncodedString(v, NULL, NULL);
659 Py_DECREF(temp);
660 if (v == NULL)
661 goto onError;
662 }
663 if (!PyString_Check(v)) {
664 PyErr_Format(PyExc_TypeError,
665 "decoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000666 Py_Type(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000667 Py_DECREF(v);
668 goto onError;
669 }
670
671 return v;
672
673 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000674 return NULL;
675}
676
677PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000678 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000679 const char *encoding,
680 const char *errors)
681{
682 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000683
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000684 str = PyString_FromStringAndSize(s, size);
685 if (str == NULL)
686 return NULL;
687 v = PyString_AsEncodedString(str, encoding, errors);
688 Py_DECREF(str);
689 return v;
690}
691
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000692PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000693 const char *encoding,
694 const char *errors)
695{
696 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000697
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000698 if (!PyString_Check(str)) {
699 PyErr_BadArgument();
700 goto onError;
701 }
702
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000703 if (encoding == NULL) {
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000704 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000705 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000706
707 /* Encode via the codec registry */
708 v = PyCodec_Encode(str, encoding, errors);
709 if (v == NULL)
710 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000711
712 return v;
713
714 onError:
715 return NULL;
716}
717
718PyObject *PyString_AsEncodedString(PyObject *str,
719 const char *encoding,
720 const char *errors)
721{
722 PyObject *v;
723
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000724 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000725 if (v == NULL)
726 goto onError;
727
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000728 /* Convert Unicode to a string using the default encoding */
729 if (PyUnicode_Check(v)) {
730 PyObject *temp = v;
731 v = PyUnicode_AsEncodedString(v, NULL, NULL);
732 Py_DECREF(temp);
733 if (v == NULL)
734 goto onError;
735 }
736 if (!PyString_Check(v)) {
737 PyErr_Format(PyExc_TypeError,
738 "encoder did not return a string object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000739 Py_Type(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000740 Py_DECREF(v);
741 goto onError;
742 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000743
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000744 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000745
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000746 onError:
747 return NULL;
748}
749
Guido van Rossum234f9421993-06-17 12:35:49 +0000750static void
Fred Drakeba096332000-07-09 07:04:36 +0000751string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000752{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000753 switch (PyString_CHECK_INTERNED(op)) {
754 case SSTATE_NOT_INTERNED:
755 break;
756
757 case SSTATE_INTERNED_MORTAL:
758 /* revive dead object temporarily for DelItem */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000759 Py_Refcnt(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000760 if (PyDict_DelItem(interned, op) != 0)
761 Py_FatalError(
762 "deletion of interned string failed");
763 break;
764
765 case SSTATE_INTERNED_IMMORTAL:
766 Py_FatalError("Immortal interned string died.");
767
768 default:
769 Py_FatalError("Inconsistent interned string state.");
770 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000771 Py_Type(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000772}
773
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000774/* Unescape a backslash-escaped string. If unicode is non-zero,
775 the string is a u-literal. If recode_encoding is non-zero,
776 the string is UTF-8 encoded and should be re-encoded in the
777 specified encoding. */
778
779PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000780 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000781 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000782 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000783 const char *recode_encoding)
784{
785 int c;
786 char *p, *buf;
787 const char *end;
788 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000789 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000790 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000791 if (v == NULL)
792 return NULL;
793 p = buf = PyString_AsString(v);
794 end = s + len;
795 while (s < end) {
796 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000797 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000798 if (recode_encoding && (*s & 0x80)) {
799 PyObject *u, *w;
800 char *r;
801 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000802 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000803 t = s;
804 /* Decode non-ASCII bytes as UTF-8. */
805 while (t < end && (*t & 0x80)) t++;
806 u = PyUnicode_DecodeUTF8(s, t - s, errors);
807 if(!u) goto failed;
808
809 /* Recode them in target encoding. */
810 w = PyUnicode_AsEncodedString(
811 u, recode_encoding, errors);
812 Py_DECREF(u);
813 if (!w) goto failed;
814
815 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000816 assert(PyString_Check(w));
817 r = PyString_AS_STRING(w);
818 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000819 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000820 p += rn;
821 Py_DECREF(w);
822 s = t;
823 } else {
824 *p++ = *s++;
825 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000826 continue;
827 }
828 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000829 if (s==end) {
830 PyErr_SetString(PyExc_ValueError,
831 "Trailing \\ in string");
832 goto failed;
833 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000834 switch (*s++) {
835 /* XXX This assumes ASCII! */
836 case '\n': break;
837 case '\\': *p++ = '\\'; break;
838 case '\'': *p++ = '\''; break;
839 case '\"': *p++ = '\"'; break;
840 case 'b': *p++ = '\b'; break;
841 case 'f': *p++ = '\014'; break; /* FF */
842 case 't': *p++ = '\t'; break;
843 case 'n': *p++ = '\n'; break;
844 case 'r': *p++ = '\r'; break;
845 case 'v': *p++ = '\013'; break; /* VT */
846 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
847 case '0': case '1': case '2': case '3':
848 case '4': case '5': case '6': case '7':
849 c = s[-1] - '0';
850 if ('0' <= *s && *s <= '7') {
851 c = (c<<3) + *s++ - '0';
852 if ('0' <= *s && *s <= '7')
853 c = (c<<3) + *s++ - '0';
854 }
855 *p++ = c;
856 break;
857 case 'x':
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000858 if (ISXDIGIT(s[0]) && ISXDIGIT(s[1])) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000859 unsigned int x = 0;
860 c = Py_CHARMASK(*s);
861 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000862 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000863 x = c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000864 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000865 x = 10 + c - 'a';
866 else
867 x = 10 + c - 'A';
868 x = x << 4;
869 c = Py_CHARMASK(*s);
870 s++;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000871 if (ISDIGIT(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000872 x += c - '0';
Guido van Rossum6ccd3f22007-10-09 03:46:30 +0000873 else if (ISLOWER(c))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000874 x += 10 + c - 'a';
875 else
876 x += 10 + c - 'A';
877 *p++ = x;
878 break;
879 }
880 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000881 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000882 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000883 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000884 }
885 if (strcmp(errors, "replace") == 0) {
886 *p++ = '?';
887 } else if (strcmp(errors, "ignore") == 0)
888 /* do nothing */;
889 else {
890 PyErr_Format(PyExc_ValueError,
891 "decoding error; "
892 "unknown error handling code: %.400s",
893 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000894 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000895 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000896 default:
897 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000898 s--;
899 goto non_esc; /* an arbitry number of unescaped
900 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000901 }
902 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000903 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000904 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000905 return v;
906 failed:
907 Py_DECREF(v);
908 return NULL;
909}
910
Thomas Wouters477c8d52006-05-27 19:21:47 +0000911/* -------------------------------------------------------------------- */
912/* object api */
913
Martin v. Löwis18e16552006-02-15 17:27:45 +0000914static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000915string_getsize(register PyObject *op)
916{
917 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000918 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000919 if (PyString_AsStringAndSize(op, &s, &len))
920 return -1;
921 return len;
922}
923
924static /*const*/ char *
925string_getbuffer(register PyObject *op)
926{
927 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000928 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000929 if (PyString_AsStringAndSize(op, &s, &len))
930 return NULL;
931 return s;
932}
933
Martin v. Löwis18e16552006-02-15 17:27:45 +0000934Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000935PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000937 if (PyUnicode_Check(op)) {
938 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
939 if (!op)
940 return -1;
941 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000942 if (!PyString_Check(op))
943 return string_getsize(op);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000944 return Py_Size(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945}
946
947/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000948PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949{
Martin v. Löwis5b222132007-06-10 09:51:05 +0000950 if (PyUnicode_Check(op)) {
951 op = _PyUnicode_AsDefaultEncodedString(op, NULL);
952 if (!op)
953 return NULL;
954 }
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000955 if (!PyString_Check(op))
956 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000958}
959
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000960int
961PyString_AsStringAndSize(register PyObject *obj,
962 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000963 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000964{
965 if (s == NULL) {
966 PyErr_BadInternalCall();
967 return -1;
968 }
969
970 if (!PyString_Check(obj)) {
971 if (PyUnicode_Check(obj)) {
972 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
973 if (obj == NULL)
974 return -1;
975 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000976 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000977 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000978 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +0000979 "expected string, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000980 "%.200s found", Py_Type(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000981 return -1;
982 }
983 }
984
985 *s = PyString_AS_STRING(obj);
986 if (len != NULL)
987 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000988 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000989 PyErr_SetString(PyExc_TypeError,
990 "expected string without null bytes");
991 return -1;
992 }
993 return 0;
994}
995
Thomas Wouters477c8d52006-05-27 19:21:47 +0000996/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000997/* Methods */
998
Thomas Wouters477c8d52006-05-27 19:21:47 +0000999#define STRINGLIB_CHAR char
1000
1001#define STRINGLIB_CMP memcmp
1002#define STRINGLIB_LEN PyString_GET_SIZE
1003#define STRINGLIB_NEW PyString_FromStringAndSize
1004#define STRINGLIB_STR PyString_AS_STRING
1005
1006#define STRINGLIB_EMPTY nullstring
1007
1008#include "stringlib/fastsearch.h"
1009
1010#include "stringlib/count.h"
1011#include "stringlib/find.h"
1012#include "stringlib/partition.h"
1013
1014
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001015PyObject *
1016PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001017{
Walter Dörwald1ab83302007-05-18 17:15:44 +00001018 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001019 register PyStringObject* op = (PyStringObject*) obj;
Martin v. Löwis5b222132007-06-10 09:51:05 +00001020 Py_ssize_t length = PyString_GET_SIZE(op);
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00001021 size_t newsize = 3 + 4 * Py_Size(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00001022 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001023 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00001024 PyErr_SetString(PyExc_OverflowError,
1025 "string is too large to make repr");
1026 }
Walter Dörwald1ab83302007-05-18 17:15:44 +00001027 v = PyUnicode_FromUnicode(NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +00001029 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001030 }
1031 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001032 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +00001033 register Py_UNICODE c;
1034 register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +00001035 int quote;
1036
Thomas Wouters7e474022000-07-16 12:04:32 +00001037 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +00001038 quote = '\'';
Walter Dörwald1ab83302007-05-18 17:15:44 +00001039 if (smartquotes) {
Guido van Rossuma1cdfd92007-07-03 14:52:23 +00001040 char *test, *start;
1041 start = PyString_AS_STRING(op);
1042 for (test = start; test < start+length; ++test) {
Walter Dörwald1ab83302007-05-18 17:15:44 +00001043 if (*test == '"') {
1044 quote = '\''; /* switch back to single quote */
1045 goto decided;
1046 }
1047 else if (*test == '\'')
1048 quote = '"';
1049 }
1050 decided:
1051 ;
1052 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +00001053
Guido van Rossum7611d1d2007-06-15 00:00:12 +00001054 *p++ = 's', *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001055 for (i = 0; i < Py_Size(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +00001056 /* There's at least enough room for a hex escape
1057 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +00001058 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +00001060 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001061 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +00001062 else if (c == '\t')
1063 *p++ = '\\', *p++ = 't';
1064 else if (c == '\n')
1065 *p++ = '\\', *p++ = 'n';
1066 else if (c == '\r')
1067 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +00001068 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +00001069 *p++ = '\\';
1070 *p++ = 'x';
1071 *p++ = hexdigits[(c & 0xf0) >> 4];
1072 *p++ = hexdigits[c & 0xf];
Martin v. Löwisfed24052002-10-07 13:55:50 +00001073 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +00001074 else
1075 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001076 }
Walter Dörwald1ab83302007-05-18 17:15:44 +00001077 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +00001078 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001079 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +00001080 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
1081 Py_DECREF(v);
1082 return NULL;
1083 }
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001084 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001085 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086}
1087
Guido van Rossum189f1df2001-05-01 16:51:53 +00001088static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001089string_repr(PyObject *op)
1090{
1091 return PyString_Repr(op, 1);
1092}
1093
1094static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +00001095string_str(PyObject *s)
1096{
Tim Petersc9933152001-10-16 20:18:24 +00001097 assert(PyString_Check(s));
1098 if (PyString_CheckExact(s)) {
1099 Py_INCREF(s);
1100 return s;
1101 }
1102 else {
1103 /* Subtype -- return genuine string with the same value. */
1104 PyStringObject *t = (PyStringObject *) s;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001105 return PyString_FromStringAndSize(t->ob_sval, Py_Size(t));
Tim Petersc9933152001-10-16 20:18:24 +00001106 }
Guido van Rossum189f1df2001-05-01 16:51:53 +00001107}
1108
Martin v. Löwis18e16552006-02-15 17:27:45 +00001109static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001110string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001111{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001112 return Py_Size(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001113}
1114
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001115static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001116string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001117{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001118 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001119 register PyStringObject *op;
1120 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001121 if (PyUnicode_Check(bb))
1122 return PyUnicode_Concat((PyObject *)a, bb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00001123 if (PyBytes_Check(bb))
1124 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001125 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00001126 "cannot concatenate 'str8' and '%.200s' objects",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001127 Py_Type(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001128 return NULL;
1129 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001130#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001131 /* Optimize cases with empty left or right operand */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001132 if ((Py_Size(a) == 0 || Py_Size(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +00001133 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001134 if (Py_Size(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001135 Py_INCREF(bb);
1136 return bb;
1137 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001138 Py_INCREF(a);
1139 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001140 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001141 size = Py_Size(a) + Py_Size(b);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001142 if (size < 0) {
1143 PyErr_SetString(PyExc_OverflowError,
1144 "strings are too large to concat");
1145 return NULL;
1146 }
1147
Guido van Rossume3a8e7e2002-08-19 19:26:42 +00001148 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +00001149 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001150 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001151 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001152 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001153 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001154 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001155 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1156 Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001157 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001158 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001159#undef b
1160}
1161
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001162static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001163string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001164{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001165 register Py_ssize_t i;
1166 register Py_ssize_t j;
1167 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001168 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001169 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001170 if (n < 0)
1171 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001172 /* watch out for overflows: the size can overflow int,
1173 * and the # of bytes needed can overflow size_t
1174 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001175 size = Py_Size(a) * n;
1176 if (n && size / n != Py_Size(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001177 PyErr_SetString(PyExc_OverflowError,
1178 "repeated string is too long");
1179 return NULL;
1180 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001181 if (size == Py_Size(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001182 Py_INCREF(a);
1183 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001184 }
Tim Peterse7c05322004-06-27 17:24:49 +00001185 nbytes = (size_t)size;
1186 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001187 PyErr_SetString(PyExc_OverflowError,
1188 "repeated string is too long");
1189 return NULL;
1190 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001191 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001192 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001193 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001194 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001195 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001196 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001197 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001198 op->ob_sval[size] = '\0';
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001199 if (Py_Size(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001200 memset(op->ob_sval, a->ob_sval[0] , n);
1201 return (PyObject *) op;
1202 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001203 i = 0;
1204 if (i < size) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001205 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
1206 i = Py_Size(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001207 }
1208 while (i < size) {
1209 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001210 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001211 i += j;
1212 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001213 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001214}
1215
Guido van Rossum9284a572000-03-07 15:53:43 +00001216static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001217string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001218{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001219 if (!PyString_CheckExact(sub_obj)) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001220 if (PyUnicode_Check(sub_obj))
1221 return PyUnicode_Contains(str_obj, sub_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001222 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001223 PyErr_Format(PyExc_TypeError,
1224 "'in <string>' requires string as left operand, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001225 "not %.200s", Py_Type(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001226 return -1;
1227 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001228 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001229
Thomas Wouters477c8d52006-05-27 19:21:47 +00001230 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001231}
1232
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001233static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001234string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001235{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001236 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001237 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001238 if (i < 0 || i >= Py_Size(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001239 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001240 return NULL;
1241 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001242 pchar = a->ob_sval[i];
1243 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001244 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001245 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001246 else {
1247#ifdef COUNT_ALLOCS
1248 one_strings++;
1249#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001250 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001251 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001252 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001253}
1254
Martin v. Löwiscd353062001-05-24 16:56:35 +00001255static PyObject*
1256string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001257{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001258 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001259 Py_ssize_t len_a, len_b;
1260 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001261 PyObject *result;
1262
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001263 /* Make sure both arguments are strings. */
1264 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001265 result = Py_NotImplemented;
1266 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001267 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001268 if (a == b) {
1269 switch (op) {
1270 case Py_EQ:case Py_LE:case Py_GE:
1271 result = Py_True;
1272 goto out;
1273 case Py_NE:case Py_LT:case Py_GT:
1274 result = Py_False;
1275 goto out;
1276 }
1277 }
1278 if (op == Py_EQ) {
1279 /* Supporting Py_NE here as well does not save
1280 much time, since Py_NE is rarely used. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001281 if (Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001282 && (a->ob_sval[0] == b->ob_sval[0]
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001283 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001284 result = Py_True;
1285 } else {
1286 result = Py_False;
1287 }
1288 goto out;
1289 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001290 len_a = Py_Size(a); len_b = Py_Size(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001291 min_len = (len_a < len_b) ? len_a : len_b;
1292 if (min_len > 0) {
1293 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1294 if (c==0)
1295 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001296 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001297 c = 0;
1298 if (c == 0)
1299 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1300 switch (op) {
1301 case Py_LT: c = c < 0; break;
1302 case Py_LE: c = c <= 0; break;
1303 case Py_EQ: assert(0); break; /* unreachable */
1304 case Py_NE: c = c != 0; break;
1305 case Py_GT: c = c > 0; break;
1306 case Py_GE: c = c >= 0; break;
1307 default:
1308 result = Py_NotImplemented;
1309 goto out;
1310 }
1311 result = c ? Py_True : Py_False;
1312 out:
1313 Py_INCREF(result);
1314 return result;
1315}
1316
1317int
1318_PyString_Eq(PyObject *o1, PyObject *o2)
1319{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001320 PyStringObject *a = (PyStringObject*) o1;
1321 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001322 return Py_Size(a) == Py_Size(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001323 && *a->ob_sval == *b->ob_sval
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001324 && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001325}
1326
Guido van Rossum9bfef441993-03-29 10:43:31 +00001327static long
Fred Drakeba096332000-07-09 07:04:36 +00001328string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001329{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001330 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001331 register unsigned char *p;
1332 register long x;
1333
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001334 if (a->ob_shash != -1)
1335 return a->ob_shash;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001336 len = Py_Size(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001337 p = (unsigned char *) a->ob_sval;
1338 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001339 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001340 x = (1000003*x) ^ *p++;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001341 x ^= Py_Size(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001342 if (x == -1)
1343 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001344 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001345 return x;
1346}
1347
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001348static PyObject*
1349string_subscript(PyStringObject* self, PyObject* item)
1350{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001351 if (PyIndex_Check(item)) {
1352 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001353 if (i == -1 && PyErr_Occurred())
1354 return NULL;
1355 if (i < 0)
1356 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001357 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001358 }
1359 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001360 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001361 char* source_buf;
1362 char* result_buf;
1363 PyObject* result;
1364
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001365 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001366 PyString_GET_SIZE(self),
1367 &start, &stop, &step, &slicelength) < 0) {
1368 return NULL;
1369 }
1370
1371 if (slicelength <= 0) {
1372 return PyString_FromStringAndSize("", 0);
1373 }
Thomas Woutersed03b412007-08-28 21:37:11 +00001374 else if (start == 0 && step == 1 &&
1375 slicelength == PyString_GET_SIZE(self) &&
1376 PyString_CheckExact(self)) {
1377 Py_INCREF(self);
1378 return (PyObject *)self;
1379 }
1380 else if (step == 1) {
1381 return PyString_FromStringAndSize(
1382 PyString_AS_STRING(self) + start,
1383 slicelength);
1384 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001385 else {
1386 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001387 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001388 if (result_buf == NULL)
1389 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001390
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001391 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001392 cur += step, i++) {
1393 result_buf[i] = source_buf[cur];
1394 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001395
1396 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001397 slicelength);
1398 PyMem_Free(result_buf);
1399 return result;
1400 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001401 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001402 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001403 PyErr_Format(PyExc_TypeError,
1404 "string indices must be integers, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001405 Py_Type(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001406 return NULL;
1407 }
1408}
1409
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001410static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001411string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001412{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001413 return PyBuffer_FillInfo(view, (void *)self->ob_sval, Py_Size(self), 0, flags);
Guido van Rossum1db70701998-10-08 02:18:52 +00001414}
1415
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001416static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001417 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001418 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001419 (ssizeargfunc)string_repeat, /*sq_repeat*/
1420 (ssizeargfunc)string_item, /*sq_item*/
Thomas Woutersd2cf20e2007-08-30 22:57:53 +00001421 0, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001422 0, /*sq_ass_item*/
1423 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001424 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001425};
1426
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001427static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001428 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001429 (binaryfunc)string_subscript,
1430 0,
1431};
1432
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001433static PyBufferProcs string_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001434 (getbufferproc)string_buffer_getbuffer,
1435 NULL,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001436};
1437
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439#define LEFTSTRIP 0
1440#define RIGHTSTRIP 1
1441#define BOTHSTRIP 2
1442
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001443/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001444static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1445
1446#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001447
Thomas Wouters477c8d52006-05-27 19:21:47 +00001448
1449/* Don't call if length < 2 */
1450#define Py_STRING_MATCH(target, offset, pattern, length) \
1451 (target[offset] == pattern[0] && \
1452 target[offset+length-1] == pattern[length-1] && \
1453 !memcmp(target+offset+1, pattern+1, length-2) )
1454
1455
1456/* Overallocate the initial list to reduce the number of reallocs for small
1457 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1458 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1459 text (roughly 11 words per line) and field delimited data (usually 1-10
1460 fields). For large strings the split algorithms are bandwidth limited
1461 so increasing the preallocation likely will not improve things.*/
1462
1463#define MAX_PREALLOC 12
1464
1465/* 5 splits gives 6 elements */
1466#define PREALLOC_SIZE(maxsplit) \
1467 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1468
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001469#define SPLIT_APPEND(data, left, right) \
1470 str = PyString_FromStringAndSize((data) + (left), \
1471 (right) - (left)); \
1472 if (str == NULL) \
1473 goto onError; \
1474 if (PyList_Append(list, str)) { \
1475 Py_DECREF(str); \
1476 goto onError; \
1477 } \
1478 else \
1479 Py_DECREF(str);
1480
Thomas Wouters477c8d52006-05-27 19:21:47 +00001481#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001482 str = PyString_FromStringAndSize((data) + (left), \
1483 (right) - (left)); \
1484 if (str == NULL) \
1485 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001486 if (count < MAX_PREALLOC) { \
1487 PyList_SET_ITEM(list, count, str); \
1488 } else { \
1489 if (PyList_Append(list, str)) { \
1490 Py_DECREF(str); \
1491 goto onError; \
1492 } \
1493 else \
1494 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001495 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001496 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497
Thomas Wouters477c8d52006-05-27 19:21:47 +00001498/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001499#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001500
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00001501#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
1502#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
1503#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
1504#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001505
1506Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001507split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001508{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001509 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001511 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512
1513 if (list == NULL)
1514 return NULL;
1515
Thomas Wouters477c8d52006-05-27 19:21:47 +00001516 i = j = 0;
1517
1518 while (maxsplit-- > 0) {
1519 SKIP_SPACE(s, i, len);
1520 if (i==len) break;
1521 j = i; i++;
1522 SKIP_NONSPACE(s, i, len);
1523 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001525
1526 if (i < len) {
1527 /* Only occurs when maxsplit was reached */
1528 /* Skip any remaining whitespace and copy to end of string */
1529 SKIP_SPACE(s, i, len);
1530 if (i != len)
1531 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001532 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001533 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001534 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001535 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001536 Py_DECREF(list);
1537 return NULL;
1538}
1539
Thomas Wouters477c8d52006-05-27 19:21:47 +00001540Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001541split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001542{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001543 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001544 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001545 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001546
1547 if (list == NULL)
1548 return NULL;
1549
Thomas Wouters477c8d52006-05-27 19:21:47 +00001550 i = j = 0;
1551 while ((j < len) && (maxcount-- > 0)) {
1552 for(; j<len; j++) {
1553 /* I found that using memchr makes no difference */
1554 if (s[j] == ch) {
1555 SPLIT_ADD(s, i, j);
1556 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001557 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001558 }
1559 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001560 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001561 if (i <= len) {
1562 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001563 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001564 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001565 return list;
1566
1567 onError:
1568 Py_DECREF(list);
1569 return NULL;
1570}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001572PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573"S.split([sep [,maxsplit]]) -> list of strings\n\
1574\n\
1575Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001576delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001577splits are done. If sep is not specified or is None, any\n\
1578whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579
1580static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001581string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001583 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001584 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001585 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001586 PyObject *list, *str, *subobj = Py_None;
1587#ifdef USE_FAST
1588 Py_ssize_t pos;
1589#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001591 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001593 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001594 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001595 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001597 if (PyString_Check(subobj)) {
1598 sub = PyString_AS_STRING(subobj);
1599 n = PyString_GET_SIZE(subobj);
1600 }
1601 else if (PyUnicode_Check(subobj))
1602 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1603 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1604 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001605
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001606 if (n == 0) {
1607 PyErr_SetString(PyExc_ValueError, "empty separator");
1608 return NULL;
1609 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001610 else if (n == 1)
1611 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612
Thomas Wouters477c8d52006-05-27 19:21:47 +00001613 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 if (list == NULL)
1615 return NULL;
1616
Thomas Wouters477c8d52006-05-27 19:21:47 +00001617#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001619 while (maxsplit-- > 0) {
1620 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1621 if (pos < 0)
1622 break;
1623 j = i+pos;
1624 SPLIT_ADD(s, i, j);
1625 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001627#else
1628 i = j = 0;
1629 while ((j+n <= len) && (maxsplit-- > 0)) {
1630 for (; j+n <= len; j++) {
1631 if (Py_STRING_MATCH(s, j, sub, n)) {
1632 SPLIT_ADD(s, i, j);
1633 i = j = j + n;
1634 break;
1635 }
1636 }
1637 }
1638#endif
1639 SPLIT_ADD(s, i, len);
1640 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 return list;
1642
Thomas Wouters477c8d52006-05-27 19:21:47 +00001643 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001644 Py_DECREF(list);
1645 return NULL;
1646}
1647
Thomas Wouters477c8d52006-05-27 19:21:47 +00001648PyDoc_STRVAR(partition__doc__,
1649"S.partition(sep) -> (head, sep, tail)\n\
1650\n\
1651Searches for the separator sep in S, and returns the part before it,\n\
1652the separator itself, and the part after it. If the separator is not\n\
1653found, returns S and two empty strings.");
1654
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001655static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001656string_partition(PyStringObject *self, PyObject *sep_obj)
1657{
1658 const char *sep;
1659 Py_ssize_t sep_len;
1660
1661 if (PyString_Check(sep_obj)) {
1662 sep = PyString_AS_STRING(sep_obj);
1663 sep_len = PyString_GET_SIZE(sep_obj);
1664 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001665 else if (PyUnicode_Check(sep_obj))
1666 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001667 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1668 return NULL;
1669
1670 return stringlib_partition(
1671 (PyObject*) self,
1672 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1673 sep_obj, sep, sep_len
1674 );
1675}
1676
1677PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001678"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001679\n\
1680Searches for the separator sep in S, starting at the end of S, and returns\n\
1681the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001682separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001683
1684static PyObject *
1685string_rpartition(PyStringObject *self, PyObject *sep_obj)
1686{
1687 const char *sep;
1688 Py_ssize_t sep_len;
1689
1690 if (PyString_Check(sep_obj)) {
1691 sep = PyString_AS_STRING(sep_obj);
1692 sep_len = PyString_GET_SIZE(sep_obj);
1693 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001694 else if (PyUnicode_Check(sep_obj))
1695 return PyUnicode_Partition((PyObject *) self, sep_obj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001696 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1697 return NULL;
1698
1699 return stringlib_rpartition(
1700 (PyObject*) self,
1701 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1702 sep_obj, sep, sep_len
1703 );
1704}
1705
1706Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001707rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001708{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001709 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001710 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001711 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001712
1713 if (list == NULL)
1714 return NULL;
1715
Thomas Wouters477c8d52006-05-27 19:21:47 +00001716 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001717
Thomas Wouters477c8d52006-05-27 19:21:47 +00001718 while (maxsplit-- > 0) {
1719 RSKIP_SPACE(s, i);
1720 if (i<0) break;
1721 j = i; i--;
1722 RSKIP_NONSPACE(s, i);
1723 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001724 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001725 if (i >= 0) {
1726 /* Only occurs when maxsplit was reached */
1727 /* Skip any remaining whitespace and copy to beginning of string */
1728 RSKIP_SPACE(s, i);
1729 if (i >= 0)
1730 SPLIT_ADD(s, 0, i + 1);
1731
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001732 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001733 FIX_PREALLOC_SIZE(list);
1734 if (PyList_Reverse(list) < 0)
1735 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001736 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001737 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001738 Py_DECREF(list);
1739 return NULL;
1740}
1741
Thomas Wouters477c8d52006-05-27 19:21:47 +00001742Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001743rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001744{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001745 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001746 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001747 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001748
1749 if (list == NULL)
1750 return NULL;
1751
Thomas Wouters477c8d52006-05-27 19:21:47 +00001752 i = j = len - 1;
1753 while ((i >= 0) && (maxcount-- > 0)) {
1754 for (; i >= 0; i--) {
1755 if (s[i] == ch) {
1756 SPLIT_ADD(s, i + 1, j + 1);
1757 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001758 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001759 }
1760 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001761 }
1762 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001763 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001764 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001765 FIX_PREALLOC_SIZE(list);
1766 if (PyList_Reverse(list) < 0)
1767 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001768 return list;
1769
1770 onError:
1771 Py_DECREF(list);
1772 return NULL;
1773}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001774
1775PyDoc_STRVAR(rsplit__doc__,
1776"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1777\n\
1778Return a list of the words in the string S, using sep as the\n\
1779delimiter string, starting at the end of the string and working\n\
1780to the front. If maxsplit is given, at most maxsplit splits are\n\
1781done. If sep is not specified or is None, any whitespace string\n\
1782is a separator.");
1783
1784static PyObject *
1785string_rsplit(PyStringObject *self, PyObject *args)
1786{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001787 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001788 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001789 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001790 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001791
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001792 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001793 return NULL;
1794 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001795 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001796 if (subobj == Py_None)
1797 return rsplit_whitespace(s, len, maxsplit);
1798 if (PyString_Check(subobj)) {
1799 sub = PyString_AS_STRING(subobj);
1800 n = PyString_GET_SIZE(subobj);
1801 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001802 else if (PyUnicode_Check(subobj))
1803 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001804 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1805 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001806
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001807 if (n == 0) {
1808 PyErr_SetString(PyExc_ValueError, "empty separator");
1809 return NULL;
1810 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001811 else if (n == 1)
1812 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001813
Thomas Wouters477c8d52006-05-27 19:21:47 +00001814 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001815 if (list == NULL)
1816 return NULL;
1817
1818 j = len;
1819 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001820
Thomas Wouters477c8d52006-05-27 19:21:47 +00001821 while ( (i >= 0) && (maxsplit-- > 0) ) {
1822 for (; i>=0; i--) {
1823 if (Py_STRING_MATCH(s, i, sub, n)) {
1824 SPLIT_ADD(s, i + n, j);
1825 j = i;
1826 i -= n;
1827 break;
1828 }
1829 }
1830 }
1831 SPLIT_ADD(s, 0, j);
1832 FIX_PREALLOC_SIZE(list);
1833 if (PyList_Reverse(list) < 0)
1834 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001835 return list;
1836
Thomas Wouters477c8d52006-05-27 19:21:47 +00001837onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001838 Py_DECREF(list);
1839 return NULL;
1840}
1841
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001843PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844"S.join(sequence) -> string\n\
1845\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001846Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001847sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848
1849static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001850string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851{
1852 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001853 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001856 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001857 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001858 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001859 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860
Tim Peters19fe14e2001-01-19 03:03:47 +00001861 seq = PySequence_Fast(orig, "");
1862 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001863 return NULL;
1864 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001865
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001866 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001867 if (seqlen == 0) {
1868 Py_DECREF(seq);
1869 return PyString_FromString("");
1870 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001872 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001873 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1874 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001875 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001876 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001877 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001879
Raymond Hettinger674f2412004-08-23 23:23:54 +00001880 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001881 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001882 * Do a pre-pass to figure out the total amount of space we'll
1883 * need (sz), see whether any argument is absurd, and defer to
1884 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001885 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001886 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001887 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001888 item = PySequence_Fast_GET_ITEM(seq, i);
1889 if (!PyString_Check(item)){
1890 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001891 /* Defer to Unicode join.
1892 * CAUTION: There's no gurantee that the
1893 * original sequence can be iterated over
1894 * again, so we must pass seq here.
1895 */
1896 PyObject *result;
1897 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001898 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001899 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001900 }
1901 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001902 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001903 " %.80s found",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001904 i, Py_Type(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001905 Py_DECREF(seq);
1906 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001907 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001908 sz += PyString_GET_SIZE(item);
1909 if (i != 0)
1910 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001911 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001912 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001913 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001914 Py_DECREF(seq);
1915 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001917 }
1918
1919 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001920 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001921 if (res == NULL) {
1922 Py_DECREF(seq);
1923 return NULL;
1924 }
1925
1926 /* Catenate everything. */
1927 p = PyString_AS_STRING(res);
1928 for (i = 0; i < seqlen; ++i) {
1929 size_t n;
1930 item = PySequence_Fast_GET_ITEM(seq, i);
1931 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001932 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001933 p += n;
1934 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001935 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001936 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001937 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001939
Jeremy Hylton49048292000-07-11 03:28:17 +00001940 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942}
1943
Tim Peters52e155e2001-06-16 05:42:57 +00001944PyObject *
1945_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001946{
Tim Petersa7259592001-06-16 05:11:17 +00001947 assert(sep != NULL && PyString_Check(sep));
1948 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001949 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001950}
1951
Thomas Wouters477c8d52006-05-27 19:21:47 +00001952Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001953string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001954{
1955 if (*end > len)
1956 *end = len;
1957 else if (*end < 0)
1958 *end += len;
1959 if (*end < 0)
1960 *end = 0;
1961 if (*start < 0)
1962 *start += len;
1963 if (*start < 0)
1964 *start = 0;
1965}
1966
Thomas Wouters477c8d52006-05-27 19:21:47 +00001967Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001968string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001970 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001971 const char *sub;
1972 Py_ssize_t sub_len;
1973 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974
Thomas Wouters477c8d52006-05-27 19:21:47 +00001975 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1976 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001977 return -2;
1978 if (PyString_Check(subobj)) {
1979 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001980 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001981 }
1982 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001983 return PyUnicode_Find(
1984 (PyObject *)self, subobj, start, end, dir);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001985 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001986 /* XXX - the "expected a character buffer object" is pretty
1987 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988 return -2;
1989
Thomas Wouters477c8d52006-05-27 19:21:47 +00001990 if (dir > 0)
1991 return stringlib_find_slice(
1992 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1993 sub, sub_len, start, end);
1994 else
1995 return stringlib_rfind_slice(
1996 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1997 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998}
1999
2000
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002001PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002"S.find(sub [,start [,end]]) -> int\n\
2003\n\
2004Return the lowest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00002005such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006arguments start and end are interpreted as in slice notation.\n\
2007\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002008Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009
2010static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002011string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002013 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014 if (result == -2)
2015 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002016 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017}
2018
2019
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002020PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021"S.index(sub [,start [,end]]) -> int\n\
2022\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002023Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024
2025static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002026string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002028 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029 if (result == -2)
2030 return NULL;
2031 if (result == -1) {
2032 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002033 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034 return NULL;
2035 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002036 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037}
2038
2039
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002040PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041"S.rfind(sub [,start [,end]]) -> int\n\
2042\n\
2043Return the highest index in S where substring sub is found,\n\
Guido van Rossum806c2462007-08-06 23:33:07 +00002044such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045arguments start and end are interpreted as in slice notation.\n\
2046\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002047Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048
2049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002050string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002052 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053 if (result == -2)
2054 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002055 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002056}
2057
2058
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002059PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060"S.rindex(sub [,start [,end]]) -> int\n\
2061\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002062Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063
2064static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002065string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002067 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002068 if (result == -2)
2069 return NULL;
2070 if (result == -1) {
2071 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002072 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002073 return NULL;
2074 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002075 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076}
2077
2078
Thomas Wouters477c8d52006-05-27 19:21:47 +00002079Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002080do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2081{
2082 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002083 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002084 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002085 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2086 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002087
2088 i = 0;
2089 if (striptype != RIGHTSTRIP) {
2090 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2091 i++;
2092 }
2093 }
2094
2095 j = len;
2096 if (striptype != LEFTSTRIP) {
2097 do {
2098 j--;
2099 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2100 j++;
2101 }
2102
2103 if (i == 0 && j == len && PyString_CheckExact(self)) {
2104 Py_INCREF(self);
2105 return (PyObject*)self;
2106 }
2107 else
2108 return PyString_FromStringAndSize(s+i, j-i);
2109}
2110
2111
Thomas Wouters477c8d52006-05-27 19:21:47 +00002112Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002113do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114{
2115 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002116 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118 i = 0;
2119 if (striptype != RIGHTSTRIP) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002120 while (i < len && ISSPACE(s[i])) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121 i++;
2122 }
2123 }
2124
2125 j = len;
2126 if (striptype != LEFTSTRIP) {
2127 do {
2128 j--;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002129 } while (j >= i && ISSPACE(s[j]));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130 j++;
2131 }
2132
Tim Peters8fa5dd02001-09-12 02:18:30 +00002133 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134 Py_INCREF(self);
2135 return (PyObject*)self;
2136 }
2137 else
2138 return PyString_FromStringAndSize(s+i, j-i);
2139}
2140
2141
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002143do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2144{
2145 PyObject *sep = NULL;
2146
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002147 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002148 return NULL;
2149
2150 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002151 if (PyString_Check(sep))
2152 return do_xstrip(self, striptype, sep);
2153 else if (PyUnicode_Check(sep)) {
2154 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2155 PyObject *res;
2156 if (uniself==NULL)
2157 return NULL;
2158 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2159 striptype, sep);
2160 Py_DECREF(uniself);
2161 return res;
2162 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002163 PyErr_Format(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002164 "%s arg must be None or string",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002165 STRIPNAME(striptype));
2166 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002167 }
2168
2169 return do_strip(self, striptype);
2170}
2171
2172
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002173PyDoc_STRVAR(strip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002174"S.strip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175\n\
2176Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002177whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002178If chars is given and not None, remove characters in chars instead.\n\
2179If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180
2181static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002182string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002184 if (PyTuple_GET_SIZE(args) == 0)
2185 return do_strip(self, BOTHSTRIP); /* Common case */
2186 else
2187 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188}
2189
2190
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002191PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002192"S.lstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002194Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002195If chars is given and not None, remove characters in chars instead.\n\
2196If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197
2198static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002199string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002200{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002201 if (PyTuple_GET_SIZE(args) == 0)
2202 return do_strip(self, LEFTSTRIP); /* Common case */
2203 else
2204 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205}
2206
2207
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002208PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00002209"S.rstrip([chars]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002211Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002212If chars is given and not None, remove characters in chars instead.\n\
2213If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214
2215static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002216string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002218 if (PyTuple_GET_SIZE(args) == 0)
2219 return do_strip(self, RIGHTSTRIP); /* Common case */
2220 else
2221 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222}
2223
2224
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002225PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226"S.lower() -> string\n\
2227\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002228Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229
2230static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002231string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002234 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002235 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002237 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002238 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240
2241 s = PyString_AS_STRING(newobj);
2242
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002243 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002244
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002246 int c = Py_CHARMASK(s[i]);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002247 if (ISUPPER(c))
2248 s[i] = TOLOWER(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002249 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002250
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002251 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252}
2253
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002254PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255"S.upper() -> string\n\
2256\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002257Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258
2259static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002260string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002262 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002263 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002264 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002266 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002267 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269
2270 s = PyString_AS_STRING(newobj);
2271
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002272 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002273
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002275 int c = Py_CHARMASK(s[i]);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002276 if (ISLOWER(c))
2277 s[i] = TOUPPER(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002279
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002280 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281}
2282
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002283PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284"S.title() -> string\n\
2285\n\
2286Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002287characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002288
2289static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002290string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002291{
2292 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002293 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002295 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002297 newobj = PyString_FromStringAndSize(NULL, n);
2298 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002299 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002300 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301 for (i = 0; i < n; i++) {
2302 int c = Py_CHARMASK(*s++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002303 if (ISLOWER(c)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 if (!previous_is_cased)
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002305 c = TOUPPER(c);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306 previous_is_cased = 1;
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002307 } else if (ISUPPER(c)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 if (previous_is_cased)
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002309 c = TOLOWER(c);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002310 previous_is_cased = 1;
2311 } else
2312 previous_is_cased = 0;
2313 *s_new++ = c;
2314 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002315 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002316}
2317
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002318PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319"S.capitalize() -> string\n\
2320\n\
2321Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002322capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323
2324static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002325string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326{
2327 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002328 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002329 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002331 newobj = PyString_FromStringAndSize(NULL, n);
2332 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002334 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335 if (0 < n) {
2336 int c = Py_CHARMASK(*s++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002337 if (ISLOWER(c))
2338 *s_new = TOUPPER(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 else
2340 *s_new = c;
2341 s_new++;
2342 }
2343 for (i = 1; i < n; i++) {
2344 int c = Py_CHARMASK(*s++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002345 if (ISUPPER(c))
2346 *s_new = TOLOWER(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347 else
2348 *s_new = c;
2349 s_new++;
2350 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002351 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352}
2353
2354
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002355PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356"S.count(sub[, start[, end]]) -> int\n\
2357\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002358Return the number of non-overlapping occurrences of substring sub in\n\
2359string S[start:end]. Optional arguments start and end are interpreted\n\
2360as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361
2362static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002363string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002365 PyObject *sub_obj;
2366 const char *str = PyString_AS_STRING(self), *sub;
2367 Py_ssize_t sub_len;
2368 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369
Thomas Wouters477c8d52006-05-27 19:21:47 +00002370 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2371 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002373
Thomas Wouters477c8d52006-05-27 19:21:47 +00002374 if (PyString_Check(sub_obj)) {
2375 sub = PyString_AS_STRING(sub_obj);
2376 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002377 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002378 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002379 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002380 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002381 if (count == -1)
2382 return NULL;
2383 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002384 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002385 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002386 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387 return NULL;
2388
Thomas Wouters477c8d52006-05-27 19:21:47 +00002389 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002390
Thomas Wouters477c8d52006-05-27 19:21:47 +00002391 return PyInt_FromSsize_t(
2392 stringlib_count(str + start, end - start, sub, sub_len)
2393 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394}
2395
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002396PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002397"S.swapcase() -> string\n\
2398\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002399Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002400converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401
2402static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002403string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404{
2405 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002406 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002407 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002409 newobj = PyString_FromStringAndSize(NULL, n);
2410 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002412 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413 for (i = 0; i < n; i++) {
2414 int c = Py_CHARMASK(*s++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002415 if (ISLOWER(c)) {
2416 *s_new = TOUPPER(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00002418 else if (ISUPPER(c)) {
2419 *s_new = TOLOWER(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002420 }
2421 else
2422 *s_new = c;
2423 s_new++;
2424 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002425 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426}
2427
2428
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002429PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430"S.translate(table [,deletechars]) -> string\n\
2431\n\
2432Return a copy of the string S, where all characters occurring\n\
2433in the optional argument deletechars are removed, and the\n\
2434remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002435translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436
2437static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002438string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002440 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002441 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002442 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002444 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002445 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 PyObject *result;
2447 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002450 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002451 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002452 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002453
2454 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002455 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002456 tablen = PyString_GET_SIZE(tableobj);
2457 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002458 else if (tableobj == Py_None) {
2459 table = NULL;
2460 tablen = 256;
2461 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002462 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002463 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002464 parameter; instead a mapping to None will cause characters
2465 to be deleted. */
2466 if (delobj != NULL) {
2467 PyErr_SetString(PyExc_TypeError,
2468 "deletions are implemented differently for unicode");
2469 return NULL;
2470 }
2471 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2472 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002473 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002474 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002475
Martin v. Löwis00b61272002-12-12 20:03:19 +00002476 if (tablen != 256) {
2477 PyErr_SetString(PyExc_ValueError,
2478 "translation table must be 256 characters long");
2479 return NULL;
2480 }
2481
Guido van Rossum4c08d552000-03-10 22:55:18 +00002482 if (delobj != NULL) {
2483 if (PyString_Check(delobj)) {
2484 del_table = PyString_AS_STRING(delobj);
2485 dellen = PyString_GET_SIZE(delobj);
2486 }
2487 else if (PyUnicode_Check(delobj)) {
2488 PyErr_SetString(PyExc_TypeError,
2489 "deletions are implemented differently for unicode");
2490 return NULL;
2491 }
2492 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2493 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002494 }
2495 else {
2496 del_table = NULL;
2497 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002498 }
2499
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002500 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501 result = PyString_FromStringAndSize((char *)NULL, inlen);
2502 if (result == NULL)
2503 return NULL;
2504 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002505 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506
Guido van Rossumd8faa362007-04-27 19:54:29 +00002507 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508 /* If no deletions are required, use faster code */
2509 for (i = inlen; --i >= 0; ) {
2510 c = Py_CHARMASK(*input++);
2511 if (Py_CHARMASK((*output++ = table[c])) != c)
2512 changed = 1;
2513 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002514 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002515 return result;
2516 Py_DECREF(result);
2517 Py_INCREF(input_obj);
2518 return input_obj;
2519 }
2520
Guido van Rossumd8faa362007-04-27 19:54:29 +00002521 if (table == NULL) {
2522 for (i = 0; i < 256; i++)
2523 trans_table[i] = Py_CHARMASK(i);
2524 } else {
2525 for (i = 0; i < 256; i++)
2526 trans_table[i] = Py_CHARMASK(table[i]);
2527 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002528
2529 for (i = 0; i < dellen; i++)
2530 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2531
2532 for (i = inlen; --i >= 0; ) {
2533 c = Py_CHARMASK(*input++);
2534 if (trans_table[c] != -1)
2535 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2536 continue;
2537 changed = 1;
2538 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002539 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002540 Py_DECREF(result);
2541 Py_INCREF(input_obj);
2542 return input_obj;
2543 }
2544 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002545 if (inlen > 0)
2546 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547 return result;
2548}
2549
2550
Thomas Wouters477c8d52006-05-27 19:21:47 +00002551#define FORWARD 1
2552#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002553
Thomas Wouters477c8d52006-05-27 19:21:47 +00002554/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002555
Thomas Wouters477c8d52006-05-27 19:21:47 +00002556#define findchar(target, target_len, c) \
2557 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002558
Thomas Wouters477c8d52006-05-27 19:21:47 +00002559/* String ops must return a string. */
2560/* If the object is subclass of string, create a copy */
2561Py_LOCAL(PyStringObject *)
2562return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002564 if (PyString_CheckExact(self)) {
2565 Py_INCREF(self);
2566 return self;
2567 }
2568 return (PyStringObject *)PyString_FromStringAndSize(
2569 PyString_AS_STRING(self),
2570 PyString_GET_SIZE(self));
2571}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002572
Thomas Wouters477c8d52006-05-27 19:21:47 +00002573Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002574countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002575{
2576 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002577 const char *start=target;
2578 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002579
Thomas Wouters477c8d52006-05-27 19:21:47 +00002580 while ( (start=findchar(start, end-start, c)) != NULL ) {
2581 count++;
2582 if (count >= maxcount)
2583 break;
2584 start += 1;
2585 }
2586 return count;
2587}
2588
2589Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002590findstring(const char *target, Py_ssize_t target_len,
2591 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002592 Py_ssize_t start,
2593 Py_ssize_t end,
2594 int direction)
2595{
2596 if (start < 0) {
2597 start += target_len;
2598 if (start < 0)
2599 start = 0;
2600 }
2601 if (end > target_len) {
2602 end = target_len;
2603 } else if (end < 0) {
2604 end += target_len;
2605 if (end < 0)
2606 end = 0;
2607 }
2608
2609 /* zero-length substrings always match at the first attempt */
2610 if (pattern_len == 0)
2611 return (direction > 0) ? start : end;
2612
2613 end -= pattern_len;
2614
2615 if (direction < 0) {
2616 for (; end >= start; end--)
2617 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2618 return end;
2619 } else {
2620 for (; start <= end; start++)
2621 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2622 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002623 }
2624 return -1;
2625}
2626
Thomas Wouters477c8d52006-05-27 19:21:47 +00002627Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002628countstring(const char *target, Py_ssize_t target_len,
2629 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002630 Py_ssize_t start,
2631 Py_ssize_t end,
2632 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002633{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002634 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002635
Thomas Wouters477c8d52006-05-27 19:21:47 +00002636 if (start < 0) {
2637 start += target_len;
2638 if (start < 0)
2639 start = 0;
2640 }
2641 if (end > target_len) {
2642 end = target_len;
2643 } else if (end < 0) {
2644 end += target_len;
2645 if (end < 0)
2646 end = 0;
2647 }
2648
2649 /* zero-length substrings match everywhere */
2650 if (pattern_len == 0 || maxcount == 0) {
2651 if (target_len+1 < maxcount)
2652 return target_len+1;
2653 return maxcount;
2654 }
2655
2656 end -= pattern_len;
2657 if (direction < 0) {
2658 for (; (end >= start); end--)
2659 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2660 count++;
2661 if (--maxcount <= 0) break;
2662 end -= pattern_len-1;
2663 }
2664 } else {
2665 for (; (start <= end); start++)
2666 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2667 count++;
2668 if (--maxcount <= 0)
2669 break;
2670 start += pattern_len-1;
2671 }
2672 }
2673 return count;
2674}
2675
2676
2677/* Algorithms for different cases of string replacement */
2678
2679/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2680Py_LOCAL(PyStringObject *)
2681replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002682 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002683 Py_ssize_t maxcount)
2684{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002685 char *self_s, *result_s;
2686 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002687 Py_ssize_t count, i, product;
2688 PyStringObject *result;
2689
2690 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002691
Thomas Wouters477c8d52006-05-27 19:21:47 +00002692 /* 1 at the end plus 1 after every character */
2693 count = self_len+1;
2694 if (maxcount < count)
2695 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002696
Thomas Wouters477c8d52006-05-27 19:21:47 +00002697 /* Check for overflow */
2698 /* result_len = count * to_len + self_len; */
2699 product = count * to_len;
2700 if (product / to_len != count) {
2701 PyErr_SetString(PyExc_OverflowError,
2702 "replace string is too long");
2703 return NULL;
2704 }
2705 result_len = product + self_len;
2706 if (result_len < 0) {
2707 PyErr_SetString(PyExc_OverflowError,
2708 "replace string is too long");
2709 return NULL;
2710 }
2711
2712 if (! (result = (PyStringObject *)
2713 PyString_FromStringAndSize(NULL, result_len)) )
2714 return NULL;
2715
2716 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002717 result_s = PyString_AS_STRING(result);
2718
2719 /* TODO: special case single character, which doesn't need memcpy */
2720
2721 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002722 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002723 result_s += to_len;
2724 count -= 1;
2725
2726 for (i=0; i<count; i++) {
2727 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002728 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002729 result_s += to_len;
2730 }
2731
2732 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002733 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002734
2735 return result;
2736}
2737
2738/* Special case for deleting a single character */
2739/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2740Py_LOCAL(PyStringObject *)
2741replace_delete_single_character(PyStringObject *self,
2742 char from_c, Py_ssize_t maxcount)
2743{
2744 char *self_s, *result_s;
2745 char *start, *next, *end;
2746 Py_ssize_t self_len, result_len;
2747 Py_ssize_t count;
2748 PyStringObject *result;
2749
2750 self_len = PyString_GET_SIZE(self);
2751 self_s = PyString_AS_STRING(self);
2752
2753 count = countchar(self_s, self_len, from_c, maxcount);
2754 if (count == 0) {
2755 return return_self(self);
2756 }
2757
2758 result_len = self_len - count; /* from_len == 1 */
2759 assert(result_len>=0);
2760
2761 if ( (result = (PyStringObject *)
2762 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2763 return NULL;
2764 result_s = PyString_AS_STRING(result);
2765
2766 start = self_s;
2767 end = self_s + self_len;
2768 while (count-- > 0) {
2769 next = findchar(start, end-start, from_c);
2770 if (next == NULL)
2771 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002772 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002773 result_s += (next-start);
2774 start = next+1;
2775 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002776 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002777
Thomas Wouters477c8d52006-05-27 19:21:47 +00002778 return result;
2779}
2780
2781/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2782
2783Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002784replace_delete_substring(PyStringObject *self,
2785 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002786 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002787 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002788 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002789 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002790 Py_ssize_t count, offset;
2791 PyStringObject *result;
2792
2793 self_len = PyString_GET_SIZE(self);
2794 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002795
2796 count = countstring(self_s, self_len,
2797 from_s, from_len,
2798 0, self_len, 1,
2799 maxcount);
2800
2801 if (count == 0) {
2802 /* no matches */
2803 return return_self(self);
2804 }
2805
2806 result_len = self_len - (count * from_len);
2807 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002808
Thomas Wouters477c8d52006-05-27 19:21:47 +00002809 if ( (result = (PyStringObject *)
2810 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2811 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002812
Thomas Wouters477c8d52006-05-27 19:21:47 +00002813 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002814
Thomas Wouters477c8d52006-05-27 19:21:47 +00002815 start = self_s;
2816 end = self_s + self_len;
2817 while (count-- > 0) {
2818 offset = findstring(start, end-start,
2819 from_s, from_len,
2820 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002821 if (offset == -1)
2822 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002823 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002824
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002825 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002826
Thomas Wouters477c8d52006-05-27 19:21:47 +00002827 result_s += (next-start);
2828 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002829 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002830 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002831 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002832}
2833
Thomas Wouters477c8d52006-05-27 19:21:47 +00002834/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2835Py_LOCAL(PyStringObject *)
2836replace_single_character_in_place(PyStringObject *self,
2837 char from_c, char to_c,
2838 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002839{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002840 char *self_s, *result_s, *start, *end, *next;
2841 Py_ssize_t self_len;
2842 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002843
Thomas Wouters477c8d52006-05-27 19:21:47 +00002844 /* The result string will be the same size */
2845 self_s = PyString_AS_STRING(self);
2846 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002847
Thomas Wouters477c8d52006-05-27 19:21:47 +00002848 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002849
Thomas Wouters477c8d52006-05-27 19:21:47 +00002850 if (next == NULL) {
2851 /* No matches; return the original string */
2852 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002853 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002854
Thomas Wouters477c8d52006-05-27 19:21:47 +00002855 /* Need to make a new string */
2856 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2857 if (result == NULL)
2858 return NULL;
2859 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002860 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002861
Thomas Wouters477c8d52006-05-27 19:21:47 +00002862 /* change everything in-place, starting with this one */
2863 start = result_s + (next-self_s);
2864 *start = to_c;
2865 start++;
2866 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002867
Thomas Wouters477c8d52006-05-27 19:21:47 +00002868 while (--maxcount > 0) {
2869 next = findchar(start, end-start, from_c);
2870 if (next == NULL)
2871 break;
2872 *next = to_c;
2873 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002874 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002875
Thomas Wouters477c8d52006-05-27 19:21:47 +00002876 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002877}
2878
Thomas Wouters477c8d52006-05-27 19:21:47 +00002879/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2880Py_LOCAL(PyStringObject *)
2881replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002882 const char *from_s, Py_ssize_t from_len,
2883 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002884 Py_ssize_t maxcount)
2885{
2886 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002887 char *self_s;
2888 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002889 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002890
Thomas Wouters477c8d52006-05-27 19:21:47 +00002891 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002892
Thomas Wouters477c8d52006-05-27 19:21:47 +00002893 self_s = PyString_AS_STRING(self);
2894 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002895
Thomas Wouters477c8d52006-05-27 19:21:47 +00002896 offset = findstring(self_s, self_len,
2897 from_s, from_len,
2898 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002899 if (offset == -1) {
2900 /* No matches; return the original string */
2901 return return_self(self);
2902 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002903
Thomas Wouters477c8d52006-05-27 19:21:47 +00002904 /* Need to make a new string */
2905 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2906 if (result == NULL)
2907 return NULL;
2908 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002909 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002910
Thomas Wouters477c8d52006-05-27 19:21:47 +00002911 /* change everything in-place, starting with this one */
2912 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002913 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002914 start += from_len;
2915 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002916
Thomas Wouters477c8d52006-05-27 19:21:47 +00002917 while ( --maxcount > 0) {
2918 offset = findstring(start, end-start,
2919 from_s, from_len,
2920 0, end-start, FORWARD);
2921 if (offset==-1)
2922 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002923 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002924 start += offset+from_len;
2925 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002926
Thomas Wouters477c8d52006-05-27 19:21:47 +00002927 return result;
2928}
2929
2930/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2931Py_LOCAL(PyStringObject *)
2932replace_single_character(PyStringObject *self,
2933 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002934 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002935 Py_ssize_t maxcount)
2936{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002937 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002938 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002939 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002940 Py_ssize_t count, product;
2941 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002942
Thomas Wouters477c8d52006-05-27 19:21:47 +00002943 self_s = PyString_AS_STRING(self);
2944 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002945
Thomas Wouters477c8d52006-05-27 19:21:47 +00002946 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002947 if (count == 0) {
2948 /* no matches, return unchanged */
2949 return return_self(self);
2950 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002951
Thomas Wouters477c8d52006-05-27 19:21:47 +00002952 /* use the difference between current and new, hence the "-1" */
2953 /* result_len = self_len + count * (to_len-1) */
2954 product = count * (to_len-1);
2955 if (product / (to_len-1) != count) {
2956 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2957 return NULL;
2958 }
2959 result_len = self_len + product;
2960 if (result_len < 0) {
2961 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2962 return NULL;
2963 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002964
Thomas Wouters477c8d52006-05-27 19:21:47 +00002965 if ( (result = (PyStringObject *)
2966 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2967 return NULL;
2968 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002969
Thomas Wouters477c8d52006-05-27 19:21:47 +00002970 start = self_s;
2971 end = self_s + self_len;
2972 while (count-- > 0) {
2973 next = findchar(start, end-start, from_c);
2974 if (next == NULL)
2975 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002976
Thomas Wouters477c8d52006-05-27 19:21:47 +00002977 if (next == start) {
2978 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002979 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002980 result_s += to_len;
2981 start += 1;
2982 } else {
2983 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002984 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002985 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002986 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002987 result_s += to_len;
2988 start = next+1;
2989 }
2990 }
2991 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002992 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002993
Thomas Wouters477c8d52006-05-27 19:21:47 +00002994 return result;
2995}
2996
2997/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2998Py_LOCAL(PyStringObject *)
2999replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003000 const char *from_s, Py_ssize_t from_len,
3001 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003002 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003003 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003004 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003005 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003006 Py_ssize_t count, offset, product;
3007 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003008
Thomas Wouters477c8d52006-05-27 19:21:47 +00003009 self_s = PyString_AS_STRING(self);
3010 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003011
Thomas Wouters477c8d52006-05-27 19:21:47 +00003012 count = countstring(self_s, self_len,
3013 from_s, from_len,
3014 0, self_len, FORWARD, maxcount);
3015 if (count == 0) {
3016 /* no matches, return unchanged */
3017 return return_self(self);
3018 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003019
Thomas Wouters477c8d52006-05-27 19:21:47 +00003020 /* Check for overflow */
3021 /* result_len = self_len + count * (to_len-from_len) */
3022 product = count * (to_len-from_len);
3023 if (product / (to_len-from_len) != count) {
3024 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3025 return NULL;
3026 }
3027 result_len = self_len + product;
3028 if (result_len < 0) {
3029 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3030 return NULL;
3031 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003032
Thomas Wouters477c8d52006-05-27 19:21:47 +00003033 if ( (result = (PyStringObject *)
3034 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3035 return NULL;
3036 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003037
Thomas Wouters477c8d52006-05-27 19:21:47 +00003038 start = self_s;
3039 end = self_s + self_len;
3040 while (count-- > 0) {
3041 offset = findstring(start, end-start,
3042 from_s, from_len,
3043 0, end-start, FORWARD);
3044 if (offset == -1)
3045 break;
3046 next = start+offset;
3047 if (next == start) {
3048 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003049 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003050 result_s += to_len;
3051 start += from_len;
3052 } else {
3053 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003054 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003055 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003056 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003057 result_s += to_len;
3058 start = next+from_len;
3059 }
3060 }
3061 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003062 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003063
Thomas Wouters477c8d52006-05-27 19:21:47 +00003064 return result;
3065}
3066
3067
3068Py_LOCAL(PyStringObject *)
3069replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003070 const char *from_s, Py_ssize_t from_len,
3071 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003072 Py_ssize_t maxcount)
3073{
Thomas Wouters477c8d52006-05-27 19:21:47 +00003074 if (maxcount < 0) {
3075 maxcount = PY_SSIZE_T_MAX;
3076 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3077 /* nothing to do; return the original string */
3078 return return_self(self);
3079 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003080
Thomas Wouters477c8d52006-05-27 19:21:47 +00003081 if (maxcount == 0 ||
3082 (from_len == 0 && to_len == 0)) {
3083 /* nothing to do; return the original string */
3084 return return_self(self);
3085 }
3086
3087 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003088
Thomas Wouters477c8d52006-05-27 19:21:47 +00003089 if (from_len == 0) {
3090 /* insert the 'to' string everywhere. */
3091 /* >>> "Python".replace("", ".") */
3092 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003093 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003094 }
3095
3096 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3097 /* point for an empty self string to generate a non-empty string */
3098 /* Special case so the remaining code always gets a non-empty string */
3099 if (PyString_GET_SIZE(self) == 0) {
3100 return return_self(self);
3101 }
3102
3103 if (to_len == 0) {
3104 /* delete all occurances of 'from' string */
3105 if (from_len == 1) {
3106 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003107 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003108 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003109 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003110 }
3111 }
3112
3113 /* Handle special case where both strings have the same length */
3114
3115 if (from_len == to_len) {
3116 if (from_len == 1) {
3117 return replace_single_character_in_place(
3118 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003119 from_s[0],
3120 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00003121 maxcount);
3122 } else {
3123 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003124 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003125 }
3126 }
3127
3128 /* Otherwise use the more generic algorithms */
3129 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003130 return replace_single_character(self, from_s[0],
3131 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003132 } else {
3133 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003134 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003135 }
3136}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003138PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003139"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003140\n\
3141Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003142old replaced by new. If the optional argument count is\n\
3143given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144
3145static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003146string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003148 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003149 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003150 const char *from_s, *to_s;
3151 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152
Thomas Wouters477c8d52006-05-27 19:21:47 +00003153 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003154 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155
Thomas Wouters477c8d52006-05-27 19:21:47 +00003156 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003157 from_s = PyString_AS_STRING(from);
3158 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003160 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003161 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003162 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003163 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003164 return NULL;
3165
Thomas Wouters477c8d52006-05-27 19:21:47 +00003166 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003167 to_s = PyString_AS_STRING(to);
3168 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00003170 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003171 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003172 from, to, count);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003173 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003174 return NULL;
3175
Thomas Wouters477c8d52006-05-27 19:21:47 +00003176 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003177 from_s, from_len,
3178 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179}
3180
Thomas Wouters477c8d52006-05-27 19:21:47 +00003181/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003182
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003183/* Matches the end (direction >= 0) or start (direction < 0) of self
3184 * against substr, using the start and end arguments. Returns
3185 * -1 on error, 0 if not found and 1 if found.
3186 */
3187Py_LOCAL(int)
3188_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3189 Py_ssize_t end, int direction)
3190{
3191 Py_ssize_t len = PyString_GET_SIZE(self);
3192 Py_ssize_t slen;
3193 const char* sub;
3194 const char* str;
3195
3196 if (PyString_Check(substr)) {
3197 sub = PyString_AS_STRING(substr);
3198 slen = PyString_GET_SIZE(substr);
3199 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003200 else if (PyUnicode_Check(substr))
3201 return PyUnicode_Tailmatch((PyObject *)self,
3202 substr, start, end, direction);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003203 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3204 return -1;
3205 str = PyString_AS_STRING(self);
3206
3207 string_adjust_indices(&start, &end, len);
3208
3209 if (direction < 0) {
3210 /* startswith */
3211 if (start+slen > len)
3212 return 0;
3213 } else {
3214 /* endswith */
3215 if (end-start < slen || start > len)
3216 return 0;
3217
3218 if (end-slen > start)
3219 start = end - slen;
3220 }
3221 if (end-start >= slen)
3222 return ! memcmp(str+start, sub, slen);
3223 return 0;
3224}
3225
3226
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003227PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003228"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003229\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003230Return True if S starts with the specified prefix, False otherwise.\n\
3231With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003232With optional end, stop comparing S at that position.\n\
3233prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003234
3235static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003236string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003237{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003238 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003239 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003240 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003241 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003242
Guido van Rossumc6821402000-05-08 14:08:05 +00003243 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3244 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003245 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003246 if (PyTuple_Check(subobj)) {
3247 Py_ssize_t i;
3248 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3249 result = _string_tailmatch(self,
3250 PyTuple_GET_ITEM(subobj, i),
3251 start, end, -1);
3252 if (result == -1)
3253 return NULL;
3254 else if (result) {
3255 Py_RETURN_TRUE;
3256 }
3257 }
3258 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003259 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003260 result = _string_tailmatch(self, subobj, start, end, -1);
3261 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003262 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003263 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003264 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003265}
3266
3267
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003268PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003269"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003270\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003271Return True if S ends with the specified suffix, False otherwise.\n\
3272With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003273With optional end, stop comparing S at that position.\n\
3274suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003275
3276static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003277string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003278{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003279 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003280 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003281 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003282 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003283
Guido van Rossumc6821402000-05-08 14:08:05 +00003284 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3285 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003286 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003287 if (PyTuple_Check(subobj)) {
3288 Py_ssize_t i;
3289 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3290 result = _string_tailmatch(self,
3291 PyTuple_GET_ITEM(subobj, i),
3292 start, end, +1);
3293 if (result == -1)
3294 return NULL;
3295 else if (result) {
3296 Py_RETURN_TRUE;
3297 }
3298 }
3299 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003300 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003301 result = _string_tailmatch(self, subobj, start, end, +1);
3302 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003303 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003304 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003305 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003306}
3307
3308
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003309PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003310"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003311\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003312Encodes S using the codec registered for encoding. encoding defaults\n\
3313to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003314handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003315a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3316'xmlcharrefreplace' as well as any other name registered with\n\
3317codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003318
3319static PyObject *
3320string_encode(PyStringObject *self, PyObject *args)
3321{
3322 char *encoding = NULL;
3323 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003324 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003325
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003326 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3327 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003328 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003329 if (v == NULL)
3330 goto onError;
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003331 if (!PyBytes_Check(v)) {
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003332 PyErr_Format(PyExc_TypeError,
Guido van Rossumf15a29f2007-05-04 00:41:39 +00003333 "[str8] encoder did not return a bytes object "
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003334 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003335 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003336 Py_DECREF(v);
3337 return NULL;
3338 }
3339 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003340
3341 onError:
3342 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003343}
3344
3345
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003346PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003347"S.decode([encoding[,errors]]) -> object\n\
3348\n\
3349Decodes S using the codec registered for encoding. encoding defaults\n\
3350to the default encoding. errors may be given to set a different error\n\
3351handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003352a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3353as well as any other name registerd with codecs.register_error that is\n\
3354able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003355
3356static PyObject *
3357string_decode(PyStringObject *self, PyObject *args)
3358{
3359 char *encoding = NULL;
3360 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003361 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003362
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003363 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3364 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003365 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003366 if (v == NULL)
3367 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003368 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3369 PyErr_Format(PyExc_TypeError,
3370 "decoder did not return a string/unicode object "
3371 "(type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003372 Py_Type(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003373 Py_DECREF(v);
3374 return NULL;
3375 }
3376 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003377
3378 onError:
3379 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003380}
3381
3382
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003383PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003384"S.expandtabs([tabsize]) -> string\n\
3385\n\
3386Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003387If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003388
3389static PyObject*
3390string_expandtabs(PyStringObject *self, PyObject *args)
3391{
3392 const char *e, *p;
3393 char *q;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003394 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395 PyObject *u;
3396 int tabsize = 8;
3397
3398 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3399 return NULL;
3400
Thomas Wouters7e474022000-07-16 12:04:32 +00003401 /* First pass: determine size of output string */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003402 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003403 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3404 for (p = PyString_AS_STRING(self); p < e; p++)
3405 if (*p == '\t') {
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003406 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003407 j += tabsize - (j % tabsize);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003408 if (old_j > j) {
3409 PyErr_SetString(PyExc_OverflowError,
3410 "new string is too long");
3411 return NULL;
3412 }
3413 old_j = j;
3414 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003415 }
3416 else {
3417 j++;
3418 if (*p == '\n' || *p == '\r') {
3419 i += j;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003420 old_j = j = 0;
3421 if (i < 0) {
3422 PyErr_SetString(PyExc_OverflowError,
3423 "new string is too long");
3424 return NULL;
3425 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003426 }
3427 }
3428
Guido van Rossumcd16bf62007-06-13 18:07:49 +00003429 if ((i + j) < 0) {
3430 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3431 return NULL;
3432 }
3433
Guido van Rossum4c08d552000-03-10 22:55:18 +00003434 /* Second pass: create output string and fill it */
3435 u = PyString_FromStringAndSize(NULL, i + j);
3436 if (!u)
3437 return NULL;
3438
3439 j = 0;
3440 q = PyString_AS_STRING(u);
3441
3442 for (p = PyString_AS_STRING(self); p < e; p++)
3443 if (*p == '\t') {
3444 if (tabsize > 0) {
3445 i = tabsize - (j % tabsize);
3446 j += i;
3447 while (i--)
3448 *q++ = ' ';
3449 }
3450 }
3451 else {
3452 j++;
3453 *q++ = *p;
3454 if (*p == '\n' || *p == '\r')
3455 j = 0;
3456 }
3457
3458 return u;
3459}
3460
Thomas Wouters477c8d52006-05-27 19:21:47 +00003461Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003462pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003463{
3464 PyObject *u;
3465
3466 if (left < 0)
3467 left = 0;
3468 if (right < 0)
3469 right = 0;
3470
Tim Peters8fa5dd02001-09-12 02:18:30 +00003471 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003472 Py_INCREF(self);
3473 return (PyObject *)self;
3474 }
3475
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003476 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003477 left + PyString_GET_SIZE(self) + right);
3478 if (u) {
3479 if (left)
3480 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003481 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003482 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003483 PyString_GET_SIZE(self));
3484 if (right)
3485 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3486 fill, right);
3487 }
3488
3489 return u;
3490}
3491
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003492PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003493"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003494"\n"
3495"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003496"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003497
3498static PyObject *
3499string_ljust(PyStringObject *self, PyObject *args)
3500{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003501 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003502 char fillchar = ' ';
3503
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003504 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003505 return NULL;
3506
Tim Peters8fa5dd02001-09-12 02:18:30 +00003507 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003508 Py_INCREF(self);
3509 return (PyObject*) self;
3510 }
3511
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003512 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003513}
3514
3515
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003516PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003517"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003518"\n"
3519"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003520"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003521
3522static PyObject *
3523string_rjust(PyStringObject *self, PyObject *args)
3524{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003525 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003526 char fillchar = ' ';
3527
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003528 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529 return NULL;
3530
Tim Peters8fa5dd02001-09-12 02:18:30 +00003531 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003532 Py_INCREF(self);
3533 return (PyObject*) self;
3534 }
3535
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003536 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537}
3538
3539
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003540PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003541"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003542"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003543"Return S centered in a string of length width. Padding is\n"
3544"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003545
3546static PyObject *
3547string_center(PyStringObject *self, PyObject *args)
3548{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003549 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003550 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003551 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003552
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003553 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003554 return NULL;
3555
Tim Peters8fa5dd02001-09-12 02:18:30 +00003556 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557 Py_INCREF(self);
3558 return (PyObject*) self;
3559 }
3560
3561 marg = width - PyString_GET_SIZE(self);
3562 left = marg / 2 + (marg & width & 1);
3563
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003564 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003565}
3566
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003567PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003568"S.zfill(width) -> string\n"
3569"\n"
3570"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003571"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003572
3573static PyObject *
3574string_zfill(PyStringObject *self, PyObject *args)
3575{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003576 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003577 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003578 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003579 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003580
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003581 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003582 return NULL;
3583
3584 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003585 if (PyString_CheckExact(self)) {
3586 Py_INCREF(self);
3587 return (PyObject*) self;
3588 }
3589 else
3590 return PyString_FromStringAndSize(
3591 PyString_AS_STRING(self),
3592 PyString_GET_SIZE(self)
3593 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003594 }
3595
3596 fill = width - PyString_GET_SIZE(self);
3597
3598 s = pad(self, fill, 0, '0');
3599
3600 if (s == NULL)
3601 return NULL;
3602
3603 p = PyString_AS_STRING(s);
3604 if (p[fill] == '+' || p[fill] == '-') {
3605 /* move sign to beginning of string */
3606 p[0] = p[fill];
3607 p[fill] = '0';
3608 }
3609
3610 return (PyObject*) s;
3611}
3612
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003613PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003614"S.isspace() -> bool\n\
3615\n\
3616Return True if all characters in S are whitespace\n\
3617and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618
3619static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003620string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621{
Fred Drakeba096332000-07-09 07:04:36 +00003622 register const unsigned char *p
3623 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003624 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625
Guido van Rossum4c08d552000-03-10 22:55:18 +00003626 /* Shortcut for single character strings */
3627 if (PyString_GET_SIZE(self) == 1 &&
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003628 ISSPACE(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003629 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003630
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003631 /* Special case for empty strings */
3632 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003633 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003634
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635 e = p + PyString_GET_SIZE(self);
3636 for (; p < e; p++) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003637 if (!ISSPACE(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003638 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003639 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003640 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641}
3642
3643
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003644PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003645"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003646\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003647Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003648and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003649
3650static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003651string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003652{
Fred Drakeba096332000-07-09 07:04:36 +00003653 register const unsigned char *p
3654 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003655 register const unsigned char *e;
3656
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003657 /* Shortcut for single character strings */
3658 if (PyString_GET_SIZE(self) == 1 &&
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003659 ISALPHA(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003661
3662 /* Special case for empty strings */
3663 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003664 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003665
3666 e = p + PyString_GET_SIZE(self);
3667 for (; p < e; p++) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003668 if (!ISALPHA(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003669 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003670 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003671 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003672}
3673
3674
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003675PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003677\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003678Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003679and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003680
3681static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003682string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003683{
Fred Drakeba096332000-07-09 07:04:36 +00003684 register const unsigned char *p
3685 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003686 register const unsigned char *e;
3687
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003688 /* Shortcut for single character strings */
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003689 if (PyString_GET_SIZE(self) == 1 && ISALNUM(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003690 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003691
3692 /* Special case for empty strings */
3693 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003694 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003695
3696 e = p + PyString_GET_SIZE(self);
3697 for (; p < e; p++) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003698 if (!ISALNUM(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003699 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003700 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003702}
3703
3704
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003705PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003706"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003708Return True if all characters in S are digits\n\
3709and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710
3711static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003712string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003713{
Fred Drakeba096332000-07-09 07:04:36 +00003714 register const unsigned char *p
3715 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003716 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003717
Guido van Rossum4c08d552000-03-10 22:55:18 +00003718 /* Shortcut for single character strings */
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003719 if (PyString_GET_SIZE(self) == 1 && ISDIGIT(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003721
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003722 /* Special case for empty strings */
3723 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003724 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003725
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726 e = p + PyString_GET_SIZE(self);
3727 for (; p < e; p++) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003728 if (!ISDIGIT(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003729 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003731 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003732}
3733
3734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003735PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003736"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003738Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003739at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003740
3741static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003742string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743{
Fred Drakeba096332000-07-09 07:04:36 +00003744 register const unsigned char *p
3745 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003746 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003747 int cased;
3748
Guido van Rossum4c08d552000-03-10 22:55:18 +00003749 /* Shortcut for single character strings */
3750 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003751 return PyBool_FromLong(ISLOWER(*p));
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003753 /* Special case for empty strings */
3754 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003755 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003756
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757 e = p + PyString_GET_SIZE(self);
3758 cased = 0;
3759 for (; p < e; p++) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003760 if (ISUPPER(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003761 return PyBool_FromLong(0);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003762 else if (!cased && ISLOWER(*p))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003763 cased = 1;
3764 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003765 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766}
3767
3768
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003769PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003770"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003771\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003772Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003773at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774
3775static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003776string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777{
Fred Drakeba096332000-07-09 07:04:36 +00003778 register const unsigned char *p
3779 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003780 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003781 int cased;
3782
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783 /* Shortcut for single character strings */
3784 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003785 return PyBool_FromLong(ISUPPER(*p));
Guido van Rossum4c08d552000-03-10 22:55:18 +00003786
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003787 /* Special case for empty strings */
3788 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003789 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003790
Guido van Rossum4c08d552000-03-10 22:55:18 +00003791 e = p + PyString_GET_SIZE(self);
3792 cased = 0;
3793 for (; p < e; p++) {
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003794 if (ISLOWER(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003795 return PyBool_FromLong(0);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003796 else if (!cased && ISUPPER(*p))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797 cased = 1;
3798 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003799 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800}
3801
3802
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003803PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003804"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003805\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003806Return True if S is a titlecased string and there is at least one\n\
3807character in S, i.e. uppercase characters may only follow uncased\n\
3808characters and lowercase characters only cased ones. Return False\n\
3809otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810
3811static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003812string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813{
Fred Drakeba096332000-07-09 07:04:36 +00003814 register const unsigned char *p
3815 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003816 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817 int cased, previous_is_cased;
3818
Guido van Rossum4c08d552000-03-10 22:55:18 +00003819 /* Shortcut for single character strings */
3820 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003821 return PyBool_FromLong(ISUPPER(*p));
Guido van Rossum4c08d552000-03-10 22:55:18 +00003822
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003823 /* Special case for empty strings */
3824 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003825 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003826
Guido van Rossum4c08d552000-03-10 22:55:18 +00003827 e = p + PyString_GET_SIZE(self);
3828 cased = 0;
3829 previous_is_cased = 0;
3830 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003831 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003832
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003833 if (ISUPPER(ch)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003834 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003835 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836 previous_is_cased = 1;
3837 cased = 1;
3838 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00003839 else if (ISLOWER(ch)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003840 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003841 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003842 previous_is_cased = 1;
3843 cased = 1;
3844 }
3845 else
3846 previous_is_cased = 0;
3847 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003848 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003849}
3850
3851
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003852PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003853"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003854\n\
3855Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003856Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003857is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003858
Guido van Rossum4c08d552000-03-10 22:55:18 +00003859static PyObject*
3860string_splitlines(PyStringObject *self, PyObject *args)
3861{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003862 register Py_ssize_t i;
3863 register Py_ssize_t j;
3864 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003865 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003866 PyObject *list;
3867 PyObject *str;
3868 char *data;
3869
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003870 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003871 return NULL;
3872
3873 data = PyString_AS_STRING(self);
3874 len = PyString_GET_SIZE(self);
3875
Thomas Wouters477c8d52006-05-27 19:21:47 +00003876 /* This does not use the preallocated list because splitlines is
3877 usually run with hundreds of newlines. The overhead of
3878 switching between PyList_SET_ITEM and append causes about a
3879 2-3% slowdown for that common case. A smarter implementation
3880 could move the if check out, so the SET_ITEMs are done first
3881 and the appends only done when the prealloc buffer is full.
3882 That's too much work for little gain.*/
3883
Guido van Rossum4c08d552000-03-10 22:55:18 +00003884 list = PyList_New(0);
3885 if (!list)
3886 goto onError;
3887
3888 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003889 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003890
Guido van Rossum4c08d552000-03-10 22:55:18 +00003891 /* Find a line and append it */
3892 while (i < len && data[i] != '\n' && data[i] != '\r')
3893 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003894
3895 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003896 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003897 if (i < len) {
3898 if (data[i] == '\r' && i + 1 < len &&
3899 data[i+1] == '\n')
3900 i += 2;
3901 else
3902 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003903 if (keepends)
3904 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003905 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003906 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003907 j = i;
3908 }
3909 if (j < len) {
3910 SPLIT_APPEND(data, j, len);
3911 }
3912
3913 return list;
3914
3915 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003916 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003917 return NULL;
3918}
3919
3920#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003921#undef SPLIT_ADD
3922#undef MAX_PREALLOC
3923#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003924
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003925static PyObject *
3926string_getnewargs(PyStringObject *v)
3927{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003928 return Py_BuildValue("(s#)", v->ob_sval, Py_Size(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003929}
3930
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003931
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003932static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003933string_methods[] = {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003934 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3935 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003936 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003937 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3938 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003939 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3940 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3941 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3942 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3943 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3944 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3945 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003946 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3947 capitalize__doc__},
3948 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3949 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3950 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003951 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003952 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3953 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3954 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3955 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3956 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3957 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3958 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003959 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3960 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003961 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3962 startswith__doc__},
3963 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3964 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3965 swapcase__doc__},
3966 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3967 translate__doc__},
3968 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3969 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3970 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3971 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3972 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3973 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3974 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3975 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3976 expandtabs__doc__},
3977 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3978 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003979 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003980 {NULL, NULL} /* sentinel */
3981};
3982
Jeremy Hylton938ace62002-07-17 16:30:39 +00003983static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003984str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3985
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003986static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003987string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003988{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003989 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003990 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003991
Guido van Rossumae960af2001-08-30 03:11:59 +00003992 if (type != &PyString_Type)
3993 return str_subtype_new(type, args, kwds);
Guido van Rossum3d1d7122007-06-07 17:54:36 +00003994 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
Tim Peters6d6c1a32001-08-02 04:15:00 +00003995 return NULL;
3996 if (x == NULL)
3997 return PyString_FromString("");
3998 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003999}
4000
Guido van Rossumae960af2001-08-30 03:11:59 +00004001static PyObject *
4002str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4003{
Tim Petersaf90b3e2001-09-12 05:18:58 +00004004 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004005 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00004006
4007 assert(PyType_IsSubtype(type, &PyString_Type));
4008 tmp = string_new(&PyString_Type, args, kwds);
4009 if (tmp == NULL)
4010 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00004011 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00004012 n = PyString_GET_SIZE(tmp);
4013 pnew = type->tp_alloc(type, n);
4014 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004015 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004016 ((PyStringObject *)pnew)->ob_shash =
4017 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004018 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004019 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004020 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004021 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004022}
4023
Guido van Rossumcacfc072002-05-24 19:01:59 +00004024static PyObject *
4025basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4026{
4027 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004028 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004029 return NULL;
4030}
4031
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004032static PyObject *
4033string_mod(PyObject *v, PyObject *w)
4034{
4035 if (!PyString_Check(v)) {
4036 Py_INCREF(Py_NotImplemented);
4037 return Py_NotImplemented;
4038 }
4039 return PyString_Format(v, w);
4040}
4041
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004042PyDoc_STRVAR(basestring_doc,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00004043"Type basestring cannot be instantiated; it is the base for str8 and str.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004044
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004045static PyNumberMethods string_as_number = {
4046 0, /*nb_add*/
4047 0, /*nb_subtract*/
4048 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004049 string_mod, /*nb_remainder*/
4050};
4051
4052
Guido van Rossumcacfc072002-05-24 19:01:59 +00004053PyTypeObject PyBaseString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004054 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004055 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004056 0,
4057 0,
4058 0, /* tp_dealloc */
4059 0, /* tp_print */
4060 0, /* tp_getattr */
4061 0, /* tp_setattr */
4062 0, /* tp_compare */
4063 0, /* tp_repr */
4064 0, /* tp_as_number */
4065 0, /* tp_as_sequence */
4066 0, /* tp_as_mapping */
4067 0, /* tp_hash */
4068 0, /* tp_call */
4069 0, /* tp_str */
4070 0, /* tp_getattro */
4071 0, /* tp_setattro */
4072 0, /* tp_as_buffer */
4073 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4074 basestring_doc, /* tp_doc */
4075 0, /* tp_traverse */
4076 0, /* tp_clear */
4077 0, /* tp_richcompare */
4078 0, /* tp_weaklistoffset */
4079 0, /* tp_iter */
4080 0, /* tp_iternext */
4081 0, /* tp_methods */
4082 0, /* tp_members */
4083 0, /* tp_getset */
4084 &PyBaseObject_Type, /* tp_base */
4085 0, /* tp_dict */
4086 0, /* tp_descr_get */
4087 0, /* tp_descr_set */
4088 0, /* tp_dictoffset */
4089 0, /* tp_init */
4090 0, /* tp_alloc */
4091 basestring_new, /* tp_new */
4092 0, /* tp_free */
4093};
4094
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004095PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004096"str(object) -> string\n\
4097\n\
4098Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004099If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004100
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004101static PyObject *str_iter(PyObject *seq);
4102
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004103PyTypeObject PyString_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004104 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Walter Dörwald5d7a7002007-05-03 20:49:27 +00004105 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004106 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004107 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004108 string_dealloc, /* tp_dealloc */
Guido van Rossum04dbf3b2007-08-07 19:51:00 +00004109 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004110 0, /* tp_getattr */
4111 0, /* tp_setattr */
4112 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004113 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004114 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004115 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004116 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004117 (hashfunc)string_hash, /* tp_hash */
4118 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004119 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004120 PyObject_GenericGetAttr, /* tp_getattro */
4121 0, /* tp_setattro */
4122 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00004123 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
4124 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004125 string_doc, /* tp_doc */
4126 0, /* tp_traverse */
4127 0, /* tp_clear */
4128 (richcmpfunc)string_richcompare, /* tp_richcompare */
4129 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004130 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004131 0, /* tp_iternext */
4132 string_methods, /* tp_methods */
4133 0, /* tp_members */
4134 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004135 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004136 0, /* tp_dict */
4137 0, /* tp_descr_get */
4138 0, /* tp_descr_set */
4139 0, /* tp_dictoffset */
4140 0, /* tp_init */
4141 0, /* tp_alloc */
4142 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004143 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004144};
4145
4146void
Fred Drakeba096332000-07-09 07:04:36 +00004147PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004148{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004149 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004150 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004151 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004152 if (w == NULL || !PyString_Check(*pv)) {
4153 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004154 *pv = NULL;
4155 return;
4156 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004157 v = string_concat((PyStringObject *) *pv, w);
4158 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004159 *pv = v;
4160}
4161
Guido van Rossum013142a1994-08-30 08:19:36 +00004162void
Fred Drakeba096332000-07-09 07:04:36 +00004163PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004164{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004165 PyString_Concat(pv, w);
4166 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004167}
4168
4169
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004170/* The following function breaks the notion that strings are immutable:
4171 it changes the size of a string. We get away with this only if there
4172 is only one module referencing the object. You can also think of it
4173 as creating a new string object and destroying the old one, only
4174 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004175 already be known to some other part of the code...
4176 Note that if there's not enough memory to resize the string, the original
4177 string object at *pv is deallocated, *pv is set to NULL, an "out of
4178 memory" exception is set, and -1 is returned. Else (on success) 0 is
4179 returned, and the value in *pv may or may not be the same as on input.
4180 As always, an extra byte is allocated for a trailing \0 byte (newsize
4181 does *not* include that), and a trailing \0 byte is stored.
4182*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004183
4184int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004185_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004186{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004187 register PyObject *v;
4188 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004189 v = *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004190 if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004191 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004192 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004193 Py_DECREF(v);
4194 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004195 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004196 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004197 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004198 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004199 _Py_ForgetReference(v);
4200 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004201 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004202 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004203 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004204 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004205 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004206 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004207 _Py_NewReference(*pv);
4208 sv = (PyStringObject *) *pv;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004209 Py_Size(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004210 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004211 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004212 return 0;
4213}
Guido van Rossume5372401993-03-16 12:15:04 +00004214
4215/* Helpers for formatstring */
4216
Thomas Wouters477c8d52006-05-27 19:21:47 +00004217Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004218getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004219{
Thomas Wouters977485d2006-02-16 15:59:12 +00004220 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004221 if (argidx < arglen) {
4222 (*p_argidx)++;
4223 if (arglen < 0)
4224 return args;
4225 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004226 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004227 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004228 PyErr_SetString(PyExc_TypeError,
4229 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004230 return NULL;
4231}
4232
Tim Peters38fd5b62000-09-21 05:43:11 +00004233/* Format codes
4234 * F_LJUST '-'
4235 * F_SIGN '+'
4236 * F_BLANK ' '
4237 * F_ALT '#'
4238 * F_ZERO '0'
4239 */
Guido van Rossume5372401993-03-16 12:15:04 +00004240#define F_LJUST (1<<0)
4241#define F_SIGN (1<<1)
4242#define F_BLANK (1<<2)
4243#define F_ALT (1<<3)
4244#define F_ZERO (1<<4)
4245
Thomas Wouters477c8d52006-05-27 19:21:47 +00004246Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004247formatfloat(char *buf, size_t buflen, int flags,
4248 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004249{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004250 /* fmt = '%#.' + `prec` + `type`
4251 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004252 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004253 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004254 x = PyFloat_AsDouble(v);
4255 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004256 PyErr_Format(PyExc_TypeError, "float argument required, "
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004257 "not %.200s", Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004258 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004259 }
Guido van Rossume5372401993-03-16 12:15:04 +00004260 if (prec < 0)
4261 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004262 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4263 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004264 /* Worst case length calc to ensure no buffer overrun:
4265
4266 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004267 fmt = %#.<prec>g
4268 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004269 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004270 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004271
4272 'f' formats:
4273 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4274 len = 1 + 50 + 1 + prec = 52 + prec
4275
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004276 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004277 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004278
4279 */
Guido van Rossumb5a755e2007-07-18 18:15:48 +00004280 if (((type == 'g' || type == 'G') &&
4281 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004282 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004283 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004284 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004285 return -1;
4286 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004287 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4288 (flags&F_ALT) ? "#" : "",
4289 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004290 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004291 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004292}
4293
Tim Peters38fd5b62000-09-21 05:43:11 +00004294/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4295 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4296 * Python's regular ints.
4297 * Return value: a new PyString*, or NULL if error.
4298 * . *pbuf is set to point into it,
4299 * *plen set to the # of chars following that.
4300 * Caller must decref it when done using pbuf.
4301 * The string starting at *pbuf is of the form
4302 * "-"? ("0x" | "0X")? digit+
4303 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004304 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004305 * There will be at least prec digits, zero-filled on the left if
4306 * necessary to get that many.
4307 * val object to be converted
4308 * flags bitmask of format flags; only F_ALT is looked at
4309 * prec minimum number of digits; 0-fill on left if needed
4310 * type a character in [duoxX]; u acts the same as d
4311 *
4312 * CAUTION: o, x and X conversions on regular ints can never
4313 * produce a '-' sign, but can for Python's unbounded ints.
4314 */
4315PyObject*
4316_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4317 char **pbuf, int *plen)
4318{
4319 PyObject *result = NULL;
4320 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004321 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004322 int sign; /* 1 if '-', else 0 */
4323 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004324 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004325 int numdigits; /* len == numnondigits + numdigits */
4326 int numnondigits = 0;
4327
Guido van Rossumddefaf32007-01-14 03:31:43 +00004328 /* Avoid exceeding SSIZE_T_MAX */
4329 if (prec > PY_SSIZE_T_MAX-3) {
4330 PyErr_SetString(PyExc_OverflowError,
4331 "precision too large");
4332 return NULL;
4333 }
4334
Tim Peters38fd5b62000-09-21 05:43:11 +00004335 switch (type) {
4336 case 'd':
4337 case 'u':
Martin v. Löwisff398c62007-08-14 21:57:32 +00004338 /* Special-case boolean: we want 0/1 */
4339 if (PyBool_Check(val))
4340 result = PyNumber_ToBase(val, 10);
4341 else
4342 result = Py_Type(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004343 break;
4344 case 'o':
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004345 numnondigits = 2;
4346 result = PyNumber_ToBase(val, 8);
Tim Peters38fd5b62000-09-21 05:43:11 +00004347 break;
4348 case 'x':
4349 case 'X':
4350 numnondigits = 2;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004351 result = PyNumber_ToBase(val, 16);
Tim Peters38fd5b62000-09-21 05:43:11 +00004352 break;
4353 default:
4354 assert(!"'type' not in [duoxX]");
4355 }
4356 if (!result)
4357 return NULL;
4358
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004359 buf = PyString_AsString(result);
4360 if (!buf) {
4361 Py_DECREF(result);
4362 return NULL;
4363 }
4364
Tim Peters38fd5b62000-09-21 05:43:11 +00004365 /* To modify the string in-place, there can only be one reference. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004366 if (Py_Refcnt(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004367 PyErr_BadInternalCall();
4368 return NULL;
4369 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004370 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004371 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004372 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4373 return NULL;
4374 }
4375 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004376 if (buf[len-1] == 'L') {
4377 --len;
4378 buf[len] = '\0';
4379 }
4380 sign = buf[0] == '-';
4381 numnondigits += sign;
4382 numdigits = len - numnondigits;
4383 assert(numdigits > 0);
4384
Tim Petersfff53252001-04-12 18:38:48 +00004385 /* Get rid of base marker unless F_ALT */
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004386 if (((flags & F_ALT) == 0 &&
4387 (type == 'o' || type == 'x' || type == 'X'))) {
4388 assert(buf[sign] == '0');
4389 assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
4390 buf[sign+1] == 'o');
4391 numnondigits -= 2;
4392 buf += 2;
4393 len -= 2;
4394 if (sign)
4395 buf[0] = '-';
Tim Peters38fd5b62000-09-21 05:43:11 +00004396 assert(len == numnondigits + numdigits);
4397 assert(numdigits > 0);
4398 }
4399
4400 /* Fill with leading zeroes to meet minimum width. */
4401 if (prec > numdigits) {
4402 PyObject *r1 = PyString_FromStringAndSize(NULL,
4403 numnondigits + prec);
4404 char *b1;
4405 if (!r1) {
4406 Py_DECREF(result);
4407 return NULL;
4408 }
4409 b1 = PyString_AS_STRING(r1);
4410 for (i = 0; i < numnondigits; ++i)
4411 *b1++ = *buf++;
4412 for (i = 0; i < prec - numdigits; i++)
4413 *b1++ = '0';
4414 for (i = 0; i < numdigits; i++)
4415 *b1++ = *buf++;
4416 *b1 = '\0';
4417 Py_DECREF(result);
4418 result = r1;
4419 buf = PyString_AS_STRING(result);
4420 len = numnondigits + prec;
4421 }
4422
4423 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004424 if (type == 'X') {
4425 /* Need to convert all lower case letters to upper case.
4426 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004427 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004428 if (buf[i] >= 'a' && buf[i] <= 'x')
4429 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004430 }
4431 *pbuf = buf;
4432 *plen = len;
4433 return result;
4434}
4435
Thomas Wouters477c8d52006-05-27 19:21:47 +00004436Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004437formatint(char *buf, size_t buflen, int flags,
4438 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004439{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004440 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004441 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4442 + 1 + 1 = 24 */
4443 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004444 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004445 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004446
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004447 x = PyInt_AsLong(v);
4448 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004449 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004450 Py_Type(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004451 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004452 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004453 if (x < 0 && type == 'u') {
4454 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004455 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004456 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4457 sign = "-";
4458 else
4459 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004460 if (prec < 0)
4461 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004462
4463 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004464 (type == 'x' || type == 'X' || type == 'o')) {
4465 /* When converting under %#o, %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004466 * of issues that cause pain:
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004467 * - for %#o, we want a different base marker than C
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004468 * - when 0 is being converted, the C standard leaves off
4469 * the '0x' or '0X', which is inconsistent with other
4470 * %#x/%#X conversions and inconsistent with Python's
4471 * hex() function
4472 * - there are platforms that violate the standard and
4473 * convert 0 with the '0x' or '0X'
4474 * (Metrowerks, Compaq Tru64)
4475 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004476 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004477 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004478 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004479 * We can achieve the desired consistency by inserting our
4480 * own '0x' or '0X' prefix, and substituting %x/%X in place
4481 * of %#x/%#X.
4482 *
4483 * Note that this is the same approach as used in
4484 * formatint() in unicodeobject.c
4485 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004486 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4487 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004488 }
4489 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004490 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4491 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004492 prec, type);
4493 }
4494
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004495 /* buf = '+'/'-'/'' + '0o'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004496 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004497 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004498 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004499 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004500 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004501 return -1;
4502 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004503 if (sign[0])
4504 PyOS_snprintf(buf, buflen, fmt, -x);
4505 else
4506 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004507 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004508}
4509
Thomas Wouters477c8d52006-05-27 19:21:47 +00004510Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004511formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004512{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004513 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004514 if (PyString_Check(v)) {
4515 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004516 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004517 }
4518 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004519 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004520 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004521 }
4522 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004523 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004524}
4525
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004526/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4527
4528 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4529 chars are formatted. XXX This is a magic number. Each formatting
4530 routine does bounds checking to ensure no overflow, but a better
4531 solution may be to malloc a buffer of appropriate size for each
4532 format. For now, the current solution is sufficient.
4533*/
4534#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004535
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004536PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004537PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004538{
4539 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004540 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004541 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004542 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004543 PyObject *result, *orig_args;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004544 PyObject *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004545 PyObject *dict = NULL;
4546 if (format == NULL || !PyString_Check(format) || args == NULL) {
4547 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004548 return NULL;
4549 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004550 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004551 fmt = PyString_AS_STRING(format);
4552 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004553 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004554 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004555 if (result == NULL)
4556 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004557 res = PyString_AsString(result);
4558 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004559 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004560 argidx = 0;
4561 }
4562 else {
4563 arglen = -1;
4564 argidx = -2;
4565 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004566 if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004567 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004568 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004569 while (--fmtcnt >= 0) {
4570 if (*fmt != '%') {
4571 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004572 rescnt = fmtcnt + 100;
4573 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004574 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004575 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004576 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004577 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004578 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004579 }
4580 *res++ = *fmt++;
4581 }
4582 else {
4583 /* Got a format specifier */
4584 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004585 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004586 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004587 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004588 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004589 PyObject *v = NULL;
4590 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004591 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004592 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004593 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004594 char formatbuf[FORMATBUFLEN];
4595 /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00004596 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004597 Py_ssize_t argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004598
Guido van Rossumda9c2711996-12-05 21:58:58 +00004599 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004600 if (*fmt == '(') {
4601 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004602 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004603 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004604 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004605
4606 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004607 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004608 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004609 goto error;
4610 }
4611 ++fmt;
4612 --fmtcnt;
4613 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004614 /* Skip over balanced parentheses */
4615 while (pcount > 0 && --fmtcnt >= 0) {
4616 if (*fmt == ')')
4617 --pcount;
4618 else if (*fmt == '(')
4619 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004620 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004621 }
4622 keylen = fmt - keystart - 1;
4623 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004624 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004625 "incomplete format key");
4626 goto error;
4627 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004628 key = PyString_FromStringAndSize(keystart,
4629 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004630 if (key == NULL)
4631 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004632 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004633 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004634 args_owned = 0;
4635 }
4636 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004637 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004638 if (args == NULL) {
4639 goto error;
4640 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004641 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004642 arglen = -1;
4643 argidx = -2;
4644 }
Guido van Rossume5372401993-03-16 12:15:04 +00004645 while (--fmtcnt >= 0) {
4646 switch (c = *fmt++) {
4647 case '-': flags |= F_LJUST; continue;
4648 case '+': flags |= F_SIGN; continue;
4649 case ' ': flags |= F_BLANK; continue;
4650 case '#': flags |= F_ALT; continue;
4651 case '0': flags |= F_ZERO; continue;
4652 }
4653 break;
4654 }
4655 if (c == '*') {
4656 v = getnextarg(args, arglen, &argidx);
4657 if (v == NULL)
4658 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004659 if (!PyInt_Check(v)) {
4660 PyErr_SetString(PyExc_TypeError,
4661 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004662 goto error;
4663 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004664 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004665 if (width == -1 && PyErr_Occurred())
4666 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004667 if (width < 0) {
4668 flags |= F_LJUST;
4669 width = -width;
4670 }
Guido van Rossume5372401993-03-16 12:15:04 +00004671 if (--fmtcnt >= 0)
4672 c = *fmt++;
4673 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00004674 else if (c >= 0 && ISDIGIT(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004675 width = c - '0';
4676 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004677 c = Py_CHARMASK(*fmt++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00004678 if (!ISDIGIT(c))
Guido van Rossume5372401993-03-16 12:15:04 +00004679 break;
4680 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004681 PyErr_SetString(
4682 PyExc_ValueError,
4683 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004684 goto error;
4685 }
4686 width = width*10 + (c - '0');
4687 }
4688 }
4689 if (c == '.') {
4690 prec = 0;
4691 if (--fmtcnt >= 0)
4692 c = *fmt++;
4693 if (c == '*') {
4694 v = getnextarg(args, arglen, &argidx);
4695 if (v == NULL)
4696 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004697 if (!PyInt_Check(v)) {
4698 PyErr_SetString(
4699 PyExc_TypeError,
4700 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004701 goto error;
4702 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004703 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004704 if (prec == -1 && PyErr_Occurred())
4705 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004706 if (prec < 0)
4707 prec = 0;
4708 if (--fmtcnt >= 0)
4709 c = *fmt++;
4710 }
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00004711 else if (c >= 0 && ISDIGIT(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004712 prec = c - '0';
4713 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004714 c = Py_CHARMASK(*fmt++);
Guido van Rossum6ccd3f22007-10-09 03:46:30 +00004715 if (!ISDIGIT(c))
Guido van Rossume5372401993-03-16 12:15:04 +00004716 break;
4717 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004718 PyErr_SetString(
4719 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004720 "prec too big");
4721 goto error;
4722 }
4723 prec = prec*10 + (c - '0');
4724 }
4725 }
4726 } /* prec */
4727 if (fmtcnt >= 0) {
4728 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004729 if (--fmtcnt >= 0)
4730 c = *fmt++;
4731 }
4732 }
4733 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004734 PyErr_SetString(PyExc_ValueError,
4735 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004736 goto error;
4737 }
4738 if (c != '%') {
4739 v = getnextarg(args, arglen, &argidx);
4740 if (v == NULL)
4741 goto error;
4742 }
4743 sign = 0;
4744 fill = ' ';
4745 switch (c) {
4746 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004747 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004748 len = 1;
4749 break;
4750 case 's':
Neil Schemenauerab619232005-08-31 23:02:05 +00004751 if (PyUnicode_Check(v)) {
4752 fmt = fmt_start;
4753 argidx = argidx_start;
4754 goto unicode;
4755 }
Neil Schemenauercf52c072005-08-12 17:34:58 +00004756 temp = _PyObject_Str(v);
4757 if (temp != NULL && PyUnicode_Check(temp)) {
4758 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004759 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004760 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004761 goto unicode;
4762 }
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004763 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004764 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004765 if (c == 'r')
Walter Dörwald1ab83302007-05-18 17:15:44 +00004766 temp = PyObject_ReprStr8(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004767 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004768 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004769 if (!PyString_Check(temp)) {
4770 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3d1d7122007-06-07 17:54:36 +00004771 "%s argument has non-string str()/repr()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004772 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004773 goto error;
4774 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004775 pbuf = PyString_AS_STRING(temp);
4776 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004777 if (prec >= 0 && len > prec)
4778 len = prec;
4779 break;
4780 case 'i':
4781 case 'd':
4782 case 'u':
4783 case 'o':
4784 case 'x':
4785 case 'X':
4786 if (c == 'i')
4787 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004788 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004789 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004790 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004791 prec, c, &pbuf, &ilen);
4792 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004793 if (!temp)
4794 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004795 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004796 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004797 else {
4798 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004799 len = formatint(pbuf,
4800 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004801 flags, prec, c, v);
4802 if (len < 0)
4803 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004804 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004805 }
4806 if (flags & F_ZERO)
4807 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004808 break;
4809 case 'e':
4810 case 'E':
4811 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004812 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004813 case 'g':
4814 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004815 if (c == 'F')
4816 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004817 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004818 len = formatfloat(pbuf, sizeof(formatbuf),
4819 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004820 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004821 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004822 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004823 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004824 fill = '0';
4825 break;
4826 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004827 if (PyUnicode_Check(v)) {
4828 fmt = fmt_start;
4829 argidx = argidx_start;
4830 goto unicode;
4831 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004832 pbuf = formatbuf;
4833 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004834 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004835 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004836 break;
4837 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004838 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004839 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004840 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004841 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004842 (Py_ssize_t)(fmt - 1 -
4843 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004844 goto error;
4845 }
4846 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004847 if (*pbuf == '-' || *pbuf == '+') {
4848 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004849 len--;
4850 }
4851 else if (flags & F_SIGN)
4852 sign = '+';
4853 else if (flags & F_BLANK)
4854 sign = ' ';
4855 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004856 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004857 }
4858 if (width < len)
4859 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004860 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004861 reslen -= rescnt;
4862 rescnt = width + fmtcnt + 100;
4863 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004864 if (reslen < 0) {
4865 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004866 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004867 return PyErr_NoMemory();
4868 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004869 if (_PyString_Resize(&result, reslen) < 0) {
4870 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004871 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004872 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004873 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004874 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004875 }
4876 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004877 if (fill != ' ')
4878 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004879 rescnt--;
4880 if (width > len)
4881 width--;
4882 }
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004883 if ((flags & F_ALT) &&
4884 (c == 'x' || c == 'X' || c == 'o')) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004885 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004886 assert(pbuf[1] == c);
4887 if (fill != ' ') {
4888 *res++ = *pbuf++;
4889 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004890 }
Tim Petersfff53252001-04-12 18:38:48 +00004891 rescnt -= 2;
4892 width -= 2;
4893 if (width < 0)
4894 width = 0;
4895 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004896 }
4897 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004898 do {
4899 --rescnt;
4900 *res++ = fill;
4901 } while (--width > len);
4902 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004903 if (fill == ' ') {
4904 if (sign)
4905 *res++ = sign;
4906 if ((flags & F_ALT) &&
Guido van Rossumcd16bf62007-06-13 18:07:49 +00004907 (c == 'x' || c == 'X' || c == 'o')) {
Tim Petersfff53252001-04-12 18:38:48 +00004908 assert(pbuf[0] == '0');
4909 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004910 *res++ = *pbuf++;
4911 *res++ = *pbuf++;
4912 }
4913 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004914 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004915 res += len;
4916 rescnt -= len;
4917 while (--width >= len) {
4918 --rescnt;
4919 *res++ = ' ';
4920 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004921 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004922 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004923 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004924 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004925 goto error;
4926 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004927 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004928 } /* '%' */
4929 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004930 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004931 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004932 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004933 goto error;
4934 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004935 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004936 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004937 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004938 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004939 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004940
4941 unicode:
4942 if (args_owned) {
4943 Py_DECREF(args);
4944 args_owned = 0;
4945 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004946 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004947 if (PyTuple_Check(orig_args) && argidx > 0) {
4948 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004949 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004950 v = PyTuple_New(n);
4951 if (v == NULL)
4952 goto error;
4953 while (--n >= 0) {
4954 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4955 Py_INCREF(w);
4956 PyTuple_SET_ITEM(v, n, w);
4957 }
4958 args = v;
4959 } else {
4960 Py_INCREF(orig_args);
4961 args = orig_args;
4962 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004963 args_owned = 1;
4964 /* Take what we have of the result and let the Unicode formatting
4965 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004966 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004967 if (_PyString_Resize(&result, rescnt))
4968 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004969 fmtcnt = PyString_GET_SIZE(format) - \
4970 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004971 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4972 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004973 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004974 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004975 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004976 if (v == NULL)
4977 goto error;
4978 /* Paste what we have (result) to what the Unicode formatting
4979 function returned (v) and return the result (or error) */
4980 w = PyUnicode_Concat(result, v);
4981 Py_DECREF(result);
4982 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004983 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004984 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004985
Guido van Rossume5372401993-03-16 12:15:04 +00004986 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004987 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004988 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004989 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004990 }
Guido van Rossume5372401993-03-16 12:15:04 +00004991 return NULL;
4992}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004993
Guido van Rossum2a61e741997-01-18 07:55:05 +00004994void
Fred Drakeba096332000-07-09 07:04:36 +00004995PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004996{
4997 register PyStringObject *s = (PyStringObject *)(*p);
4998 PyObject *t;
4999 if (s == NULL || !PyString_Check(s))
5000 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005001 /* If it's a string subclass, we don't really know what putting
5002 it in the interned dict might do. */
5003 if (!PyString_CheckExact(s))
5004 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005005 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00005006 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005007 if (interned == NULL) {
5008 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005009 if (interned == NULL) {
5010 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005011 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005012 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005013 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005014 t = PyDict_GetItem(interned, (PyObject *)s);
5015 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005016 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005017 Py_DECREF(*p);
5018 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005019 return;
5020 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005021
Armin Rigo79f7ad22004-08-07 19:27:39 +00005022 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005023 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005024 return;
5025 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005026 /* The two references in interned are not counted by refcnt.
5027 The string deallocator will take care of this */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00005028 Py_Refcnt(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005029 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005030}
5031
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005032void
5033PyString_InternImmortal(PyObject **p)
5034{
5035 PyString_InternInPlace(p);
5036 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5037 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5038 Py_INCREF(*p);
5039 }
5040}
5041
Guido van Rossum2a61e741997-01-18 07:55:05 +00005042
5043PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005044PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005045{
5046 PyObject *s = PyString_FromString(cp);
5047 if (s == NULL)
5048 return NULL;
5049 PyString_InternInPlace(&s);
5050 return s;
5051}
5052
Guido van Rossum8cf04761997-08-02 02:57:45 +00005053void
Fred Drakeba096332000-07-09 07:04:36 +00005054PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005055{
5056 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005057 for (i = 0; i < UCHAR_MAX + 1; i++) {
5058 Py_XDECREF(characters[i]);
5059 characters[i] = NULL;
5060 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005061 Py_XDECREF(nullstring);
5062 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005063}
Barry Warsawa903ad982001-02-23 16:40:48 +00005064
Barry Warsawa903ad982001-02-23 16:40:48 +00005065void _Py_ReleaseInternedStrings(void)
5066{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005067 PyObject *keys;
5068 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005069 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00005070 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005071
5072 if (interned == NULL || !PyDict_Check(interned))
5073 return;
5074 keys = PyDict_Keys(interned);
5075 if (keys == NULL || !PyList_Check(keys)) {
5076 PyErr_Clear();
5077 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005078 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005079
5080 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5081 detector, interned strings are not forcibly deallocated; rather, we
5082 give them their stolen references back, and then clear and DECREF
5083 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00005084
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005085 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00005086 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5087 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005088 for (i = 0; i < n; i++) {
5089 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5090 switch (s->ob_sstate) {
5091 case SSTATE_NOT_INTERNED:
5092 /* XXX Shouldn't happen */
5093 break;
5094 case SSTATE_INTERNED_IMMORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00005095 Py_Refcnt(s) += 1;
5096 immortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005097 break;
5098 case SSTATE_INTERNED_MORTAL:
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00005099 Py_Refcnt(s) += 2;
5100 mortal_size += Py_Size(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005101 break;
5102 default:
5103 Py_FatalError("Inconsistent interned string state.");
5104 }
5105 s->ob_sstate = SSTATE_NOT_INTERNED;
5106 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00005107 fprintf(stderr, "total size of all interned strings: "
5108 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5109 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005110 Py_DECREF(keys);
5111 PyDict_Clear(interned);
5112 Py_DECREF(interned);
5113 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005114}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005115
5116
5117/*********************** Str Iterator ****************************/
5118
5119typedef struct {
5120 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00005121 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005122 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
5123} striterobject;
5124
5125static void
5126striter_dealloc(striterobject *it)
5127{
5128 _PyObject_GC_UNTRACK(it);
5129 Py_XDECREF(it->it_seq);
5130 PyObject_GC_Del(it);
5131}
5132
5133static int
5134striter_traverse(striterobject *it, visitproc visit, void *arg)
5135{
5136 Py_VISIT(it->it_seq);
5137 return 0;
5138}
5139
5140static PyObject *
5141striter_next(striterobject *it)
5142{
5143 PyStringObject *seq;
5144 PyObject *item;
5145
5146 assert(it != NULL);
5147 seq = it->it_seq;
5148 if (seq == NULL)
5149 return NULL;
5150 assert(PyString_Check(seq));
5151
5152 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005153 item = PyString_FromStringAndSize(
5154 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005155 if (item != NULL)
5156 ++it->it_index;
5157 return item;
5158 }
5159
5160 Py_DECREF(seq);
5161 it->it_seq = NULL;
5162 return NULL;
5163}
5164
5165static PyObject *
5166striter_len(striterobject *it)
5167{
5168 Py_ssize_t len = 0;
5169 if (it->it_seq)
5170 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
5171 return PyInt_FromSsize_t(len);
5172}
5173
Guido van Rossum49d6b072006-08-17 21:11:47 +00005174PyDoc_STRVAR(length_hint_doc,
5175 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005176
5177static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005178 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
5179 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005180 {NULL, NULL} /* sentinel */
5181};
5182
5183PyTypeObject PyStringIter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00005184 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum49d6b072006-08-17 21:11:47 +00005185 "striterator", /* tp_name */
5186 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005187 0, /* tp_itemsize */
5188 /* methods */
5189 (destructor)striter_dealloc, /* tp_dealloc */
5190 0, /* tp_print */
5191 0, /* tp_getattr */
5192 0, /* tp_setattr */
5193 0, /* tp_compare */
5194 0, /* tp_repr */
5195 0, /* tp_as_number */
5196 0, /* tp_as_sequence */
5197 0, /* tp_as_mapping */
5198 0, /* tp_hash */
5199 0, /* tp_call */
5200 0, /* tp_str */
5201 PyObject_GenericGetAttr, /* tp_getattro */
5202 0, /* tp_setattro */
5203 0, /* tp_as_buffer */
5204 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5205 0, /* tp_doc */
5206 (traverseproc)striter_traverse, /* tp_traverse */
5207 0, /* tp_clear */
5208 0, /* tp_richcompare */
5209 0, /* tp_weaklistoffset */
5210 PyObject_SelfIter, /* tp_iter */
5211 (iternextfunc)striter_next, /* tp_iternext */
5212 striter_methods, /* tp_methods */
5213 0,
5214};
5215
5216static PyObject *
5217str_iter(PyObject *seq)
5218{
5219 striterobject *it;
5220
5221 if (!PyString_Check(seq)) {
5222 PyErr_BadInternalCall();
5223 return NULL;
5224 }
5225 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5226 if (it == NULL)
5227 return NULL;
5228 it->it_index = 0;
5229 Py_INCREF(seq);
5230 it->it_seq = (PyStringObject *)seq;
5231 _PyObject_GC_TRACK(it);
5232 return (PyObject *)it;
5233}