blob: f37f000cdc2ce9eee658e5cd549e422dc5dafe8f [file] [log] [blame]
Guido van Rossume270b431992-09-03 20:21:07 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossume270b431992-09-03 20:21:07 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossume270b431992-09-03 20:21:07 +00009******************************************************************/
10
11/* strop module */
12
Guido van Rossum983c9301997-12-29 19:52:29 +000013static char strop_module__doc__[] =
Guido van Rossum23e21e71997-12-29 19:57:36 +000014"Common string manipulations, optimized for speed.\n\
Guido van Rossum786205e1997-12-30 05:10:14 +000015\n\
Guido van Rossum983c9301997-12-29 19:52:29 +000016Always use \"import string\" rather than referencing\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +000017this module directly.";
Guido van Rossum983c9301997-12-29 19:52:29 +000018
Barry Warsawf5256011996-12-09 18:35:56 +000019#include "Python.h"
Guido van Rossume270b431992-09-03 20:21:07 +000020
Guido van Rossum7b7c5781997-03-14 04:13:56 +000021#ifdef HAVE_LIMITS_H
22#include <limits.h>
23#else
24#define INT_MAX 2147483647
25#endif
26
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000027#include <ctype.h>
Guido van Rossume22e6441993-07-09 10:51:31 +000028/* XXX This file assumes that the <ctype.h> is*() functions
29 XXX are defined for all 8-bit characters! */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000030
Guido van Rossum7999a5c1996-08-08 19:16:15 +000031/* The lstrip(), rstrip() and strip() functions are implemented
32 in do_strip(), which uses an additional parameter to indicate what
33 type of strip should occur. */
34
35#define LEFTSTRIP 0
36#define RIGHTSTRIP 1
37#define BOTHSTRIP 2
38
Guido van Rossume270b431992-09-03 20:21:07 +000039
Barry Warsawf5256011996-12-09 18:35:56 +000040static PyObject *
Guido van Rossum7999a5c1996-08-08 19:16:15 +000041split_whitespace(s, len, maxsplit)
Guido van Rossume270b431992-09-03 20:21:07 +000042 char *s;
Guido van Rossum009e79b1995-05-03 17:40:23 +000043 int len;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000044 int maxsplit;
Guido van Rossum009e79b1995-05-03 17:40:23 +000045{
Barry Warsawe8fc29c1997-01-03 22:45:34 +000046 int i = 0, j, err;
47 int countsplit = 0;
48 PyObject* item;
49 PyObject *list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +000050
Guido van Rossume270b431992-09-03 20:21:07 +000051 if (list == NULL)
52 return NULL;
53
Guido van Rossume270b431992-09-03 20:21:07 +000054 while (i < len) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +000055 while (i < len && isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000056 i = i+1;
57 }
58 j = i;
Guido van Rossumee1813d1995-02-14 00:58:59 +000059 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000060 i = i+1;
61 }
62 if (j < i) {
Barry Warsawf5256011996-12-09 18:35:56 +000063 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000064 if (item == NULL)
65 goto finally;
66
Barry Warsawf5256011996-12-09 18:35:56 +000067 err = PyList_Append(list, item);
68 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000069 if (err < 0)
70 goto finally;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000071
72 countsplit++;
Barry Warsaw93be92d1997-12-02 00:29:30 +000073 while (i < len && isspace(Py_CHARMASK(s[i]))) {
74 i = i+1;
75 }
76 if (maxsplit && (countsplit >= maxsplit) && i < len) {
Barry Warsawf5256011996-12-09 18:35:56 +000077 item = PyString_FromStringAndSize(
78 s+i, (int)(len - i));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000079 if (item == NULL)
80 goto finally;
81
Barry Warsawf5256011996-12-09 18:35:56 +000082 err = PyList_Append(list, item);
83 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000084 if (err < 0)
85 goto finally;
86
Guido van Rossum7999a5c1996-08-08 19:16:15 +000087 i = len;
88 }
Guido van Rossume270b431992-09-03 20:21:07 +000089 }
90 }
Guido van Rossume270b431992-09-03 20:21:07 +000091 return list;
Barry Warsawe8fc29c1997-01-03 22:45:34 +000092 finally:
93 Py_DECREF(list);
94 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +000095}
96
97
Guido van Rossum983c9301997-12-29 19:52:29 +000098static char splitfields__doc__[] =
Fred Drakee4f13661999-11-04 19:19:48 +000099"split(s [,sep [,maxsplit]]) -> list of strings\n\
100splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000101\n\
102Return a list of the words in the string s, using sep as the\n\
103delimiter string. If maxsplit is nonzero, splits into at most\n\
Fred Drakee4f13661999-11-04 19:19:48 +0000104maxsplit words. If sep is not specified, any whitespace string\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000105is a separator. Maxsplit defaults to 0.\n\
106\n\
107(split and splitfields are synonymous)";
108
Barry Warsawf5256011996-12-09 18:35:56 +0000109static PyObject *
Guido van Rossume270b431992-09-03 20:21:07 +0000110strop_splitfields(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000111 PyObject *self; /* Not used */
112 PyObject *args;
Guido van Rossume270b431992-09-03 20:21:07 +0000113{
Guido van Rossum572d2d91993-11-05 10:14:49 +0000114 int len, n, i, j, err;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000115 int splitcount, maxsplit;
Guido van Rossume270b431992-09-03 20:21:07 +0000116 char *s, *sub;
Barry Warsawf5256011996-12-09 18:35:56 +0000117 PyObject *list, *item;
Guido van Rossume270b431992-09-03 20:21:07 +0000118
Guido van Rossum009e79b1995-05-03 17:40:23 +0000119 sub = NULL;
120 n = 0;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000121 splitcount = 0;
122 maxsplit = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000123 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
Guido van Rossume270b431992-09-03 20:21:07 +0000124 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000125 if (sub == NULL)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000126 return split_whitespace(s, len, maxsplit);
Guido van Rossume270b431992-09-03 20:21:07 +0000127 if (n == 0) {
Barry Warsawf5256011996-12-09 18:35:56 +0000128 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossume270b431992-09-03 20:21:07 +0000129 return NULL;
130 }
131
Barry Warsawf5256011996-12-09 18:35:56 +0000132 list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +0000133 if (list == NULL)
134 return NULL;
135
136 i = j = 0;
137 while (i+n <= len) {
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000138 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000139 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000140 if (item == NULL)
141 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000142 err = PyList_Append(list, item);
143 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000144 if (err < 0)
145 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000146 i = j = i + n;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000147 splitcount++;
148 if (maxsplit && (splitcount >= maxsplit))
149 break;
Guido van Rossume270b431992-09-03 20:21:07 +0000150 }
151 else
152 i++;
153 }
Barry Warsawf5256011996-12-09 18:35:56 +0000154 item = PyString_FromStringAndSize(s+j, (int)(len-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000155 if (item == NULL)
156 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000157 err = PyList_Append(list, item);
158 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000159 if (err < 0)
160 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000161
162 return list;
Guido van Rossum572d2d91993-11-05 10:14:49 +0000163
164 fail:
Barry Warsawf5256011996-12-09 18:35:56 +0000165 Py_DECREF(list);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000166 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +0000167}
168
169
Guido van Rossum983c9301997-12-29 19:52:29 +0000170static char joinfields__doc__[] =
171"join(list [,sep]) -> string\n\
172joinfields(list [,sep]) -> string\n\
173\n\
174Return a string composed of the words in list, with\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000175intervening occurences of sep. Sep defaults to a single\n\
176space.\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000177\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000178(join and joinfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +0000179
Barry Warsawf5256011996-12-09 18:35:56 +0000180static PyObject *
Guido van Rossumc89705d1992-11-26 08:54:07 +0000181strop_joinfields(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000182 PyObject *self; /* Not used */
183 PyObject *args;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000184{
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000185 PyObject *seq;
186 char *sep = NULL;
187 int seqlen, seplen = 0;
188 int i, reslen = 0, slen = 0, sz = 100;
189 PyObject *res = NULL;
190 char* p = NULL;
191 intargfunc getitemfunc;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000192
Guido van Rossum43713e52000-02-29 13:59:29 +0000193 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
Guido van Rossumc89705d1992-11-26 08:54:07 +0000194 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000195 if (sep == NULL) {
196 sep = " ";
197 seplen = 1;
198 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000199
200 seqlen = PySequence_Length(seq);
201 if (seqlen < 0 && PyErr_Occurred())
202 return NULL;
203
204 if (seqlen == 1) {
205 /* Optimization if there's only one item */
206 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000207 if (item && !PyString_Check(item)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000208 PyErr_SetString(PyExc_TypeError,
209 "first argument must be sequence of strings");
Guido van Rossumbf338301998-10-19 13:38:36 +0000210 Py_DECREF(item);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000211 return NULL;
212 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000213 return item;
214 }
215
216 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
217 return NULL;
218 p = PyString_AsString(res);
219
220 /* optimize for lists, since it's the most common case. all others
221 * (tuples and arbitrary sequences) just use the sequence abstract
222 * interface.
Barry Warsaw04d2d151997-01-03 23:46:51 +0000223 */
224 if (PyList_Check(seq)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000225 for (i = 0; i < seqlen; i++) {
226 PyObject *item = PyList_GET_ITEM(seq, i);
227 if (!PyString_Check(item)) {
228 PyErr_SetString(PyExc_TypeError,
229 "first argument must be sequence of strings");
230 Py_DECREF(res);
231 return NULL;
232 }
233 slen = PyString_GET_SIZE(item);
234 while (reslen + slen + seplen >= sz) {
235 if (_PyString_Resize(&res, sz * 2)) {
236 Py_DECREF(res);
237 return NULL;
238 }
239 sz *= 2;
240 p = PyString_AsString(res) + reslen;
241 }
242 if (i > 0) {
243 memcpy(p, sep, seplen);
244 p += seplen;
245 reslen += seplen;
246 }
247 memcpy(p, PyString_AS_STRING(item), slen);
248 p += slen;
249 reslen += slen;
250 }
251 if (_PyString_Resize(&res, reslen)) {
252 Py_DECREF(res);
253 res = NULL;
254 }
255 return res;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000256 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000257
258 if (seq->ob_type->tp_as_sequence == NULL ||
259 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
260 {
Barry Warsawf5256011996-12-09 18:35:56 +0000261 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000262 "first argument must be a sequence");
Guido van Rossumc89705d1992-11-26 08:54:07 +0000263 return NULL;
264 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000265 /* This is now type safe */
Guido van Rossumc89705d1992-11-26 08:54:07 +0000266 for (i = 0; i < seqlen; i++) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000267 PyObject *item = getitemfunc(seq, i);
268 if (!item || !PyString_Check(item)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000269 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000270 "first argument must be sequence of strings");
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000271 Py_DECREF(res);
272 Py_XDECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000273 return NULL;
274 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000275 slen = PyString_GET_SIZE(item);
276 while (reslen + slen + seplen >= sz) {
277 if (_PyString_Resize(&res, sz * 2)) {
278 Py_DECREF(res);
279 Py_DECREF(item);
280 return NULL;
281 }
282 sz *= 2;
283 p = PyString_AsString(res) + reslen;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000284 }
Guido van Rossumc89705d1992-11-26 08:54:07 +0000285 if (i > 0) {
286 memcpy(p, sep, seplen);
287 p += seplen;
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000288 reslen += seplen;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000289 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000290 memcpy(p, PyString_AS_STRING(item), slen);
291 p += slen;
292 reslen += slen;
293 Py_DECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000294 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000295 if (_PyString_Resize(&res, reslen)) {
296 Py_DECREF(res);
297 res = NULL;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000298 }
299 return res;
300}
301
Guido van Rossum983c9301997-12-29 19:52:29 +0000302
303static char find__doc__[] =
304"find(s, sub [,start [,end]]) -> in\n\
305\n\
306Return the lowest index in s where substring sub is found,\n\
307such that sub is contained within s[start,end]. Optional\n\
308arguments start and end are interpreted as in slice notation.\n\
309\n\
310Return -1 on failure.";
311
Barry Warsawf5256011996-12-09 18:35:56 +0000312static PyObject *
Guido van Rossum5806a4f1994-08-17 13:15:46 +0000313strop_find(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000314 PyObject *self; /* Not used */
315 PyObject *args;
Guido van Rossume270b431992-09-03 20:21:07 +0000316{
317 char *s, *sub;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000318 int len, n, i = 0, last = INT_MAX;
Guido van Rossume270b431992-09-03 20:21:07 +0000319
Guido van Rossum43713e52000-02-29 13:59:29 +0000320 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000321 return NULL;
322
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000323 if (last > len)
324 last = len;
325 if (last < 0)
326 last += len;
327 if (last < 0)
328 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000329 if (i < 0)
330 i += len;
331 if (i < 0)
Guido van Rossume270b431992-09-03 20:21:07 +0000332 i = 0;
Guido van Rossume270b431992-09-03 20:21:07 +0000333
Guido van Rossum031c6311998-03-24 04:19:22 +0000334 if (n == 0 && i <= last)
Barry Warsawf5256011996-12-09 18:35:56 +0000335 return PyInt_FromLong((long)i);
Guido van Rossume270b431992-09-03 20:21:07 +0000336
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000337 last -= n;
338 for (; i <= last; ++i)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000339 if (s[i] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000340 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000341 return PyInt_FromLong((long)i);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000342
Barry Warsawf5256011996-12-09 18:35:56 +0000343 return PyInt_FromLong(-1L);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000344}
345
346
Guido van Rossum983c9301997-12-29 19:52:29 +0000347static char rfind__doc__[] =
348"rfind(s, sub [,start [,end]]) -> int\n\
349\n\
350Return the highest index in s where substring sub is found,\n\
351such that sub is contained within s[start,end]. Optional\n\
352arguments start and end are interpreted as in slice notation.\n\
353\n\
354Return -1 on failure.";
355
Barry Warsawf5256011996-12-09 18:35:56 +0000356static PyObject *
Guido van Rossum5806a4f1994-08-17 13:15:46 +0000357strop_rfind(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000358 PyObject *self; /* Not used */
359 PyObject *args;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000360{
361 char *s, *sub;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000362 int len, n, j;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000363 int i = 0, last = INT_MAX;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000364
Guido van Rossum43713e52000-02-29 13:59:29 +0000365 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000366 return NULL;
367
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000368 if (last > len)
369 last = len;
370 if (last < 0)
371 last += len;
372 if (last < 0)
373 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000374 if (i < 0)
375 i += len;
376 if (i < 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000377 i = 0;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000378
Guido van Rossum031c6311998-03-24 04:19:22 +0000379 if (n == 0 && i <= last)
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000380 return PyInt_FromLong((long)last);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000381
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000382 for (j = last-n; j >= i; --j)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000383 if (s[j] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000384 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000385 return PyInt_FromLong((long)j);
Guido van Rossume270b431992-09-03 20:21:07 +0000386
Barry Warsawf5256011996-12-09 18:35:56 +0000387 return PyInt_FromLong(-1L);
Guido van Rossume270b431992-09-03 20:21:07 +0000388}
389
Guido van Rossum983c9301997-12-29 19:52:29 +0000390
Barry Warsawf5256011996-12-09 18:35:56 +0000391static PyObject *
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000392do_strip(args, striptype)
Barry Warsawf5256011996-12-09 18:35:56 +0000393 PyObject *args;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000394 int striptype;
395{
396 char *s;
397 int len, i, j;
398
399
Guido van Rossum7e488981998-10-08 02:25:24 +0000400 if (!PyArg_Parse(args, "t#", &s, &len))
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000401 return NULL;
402
403 i = 0;
404 if (striptype != RIGHTSTRIP) {
405 while (i < len && isspace(Py_CHARMASK(s[i]))) {
406 i++;
407 }
408 }
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000409
410 j = len;
411 if (striptype != LEFTSTRIP) {
412 do {
413 j--;
414 } while (j >= i && isspace(Py_CHARMASK(s[j])));
415 j++;
416 }
417
418 if (i == 0 && j == len) {
Barry Warsawf5256011996-12-09 18:35:56 +0000419 Py_INCREF(args);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000420 return args;
421 }
422 else
Barry Warsawf5256011996-12-09 18:35:56 +0000423 return PyString_FromStringAndSize(s+i, j-i);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000424}
425
Guido van Rossume270b431992-09-03 20:21:07 +0000426
Guido van Rossum983c9301997-12-29 19:52:29 +0000427static char strip__doc__[] =
428"strip(s) -> string\n\
429\n\
430Return a copy of the string s with leading and trailing\n\
431whitespace removed.";
432
Barry Warsawf5256011996-12-09 18:35:56 +0000433static PyObject *
Guido van Rossume270b431992-09-03 20:21:07 +0000434strop_strip(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000435 PyObject *self; /* Not used */
436 PyObject *args;
Guido van Rossume270b431992-09-03 20:21:07 +0000437{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000438 return do_strip(args, BOTHSTRIP);
439}
Guido van Rossume270b431992-09-03 20:21:07 +0000440
Guido van Rossum983c9301997-12-29 19:52:29 +0000441
442static char lstrip__doc__[] =
443"lstrip(s) -> string\n\
444\n\
445Return a copy of the string s with leading whitespace removed.";
446
Barry Warsawf5256011996-12-09 18:35:56 +0000447static PyObject *
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000448strop_lstrip(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000449 PyObject *self; /* Not used */
450 PyObject *args;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000451{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000452 return do_strip(args, LEFTSTRIP);
453}
Guido van Rossume270b431992-09-03 20:21:07 +0000454
Guido van Rossum983c9301997-12-29 19:52:29 +0000455
456static char rstrip__doc__[] =
457"rstrip(s) -> string\n\
458\n\
459Return a copy of the string s with trailing whitespace removed.";
460
Barry Warsawf5256011996-12-09 18:35:56 +0000461static PyObject *
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000462strop_rstrip(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000463 PyObject *self; /* Not used */
464 PyObject *args;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000465{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000466 return do_strip(args, RIGHTSTRIP);
Guido van Rossume270b431992-09-03 20:21:07 +0000467}
468
469
Guido van Rossum983c9301997-12-29 19:52:29 +0000470static char lower__doc__[] =
471"lower(s) -> string\n\
472\n\
473Return a copy of the string s converted to lowercase.";
474
Barry Warsawf5256011996-12-09 18:35:56 +0000475static PyObject *
Barry Warsaw04d2d151997-01-03 23:46:51 +0000476strop_lower(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000477 PyObject *self; /* Not used */
478 PyObject *args;
Guido van Rossum5c850621992-09-11 23:55:51 +0000479{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000480 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000481 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000482 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000483 int changed;
484
Guido van Rossum7e488981998-10-08 02:25:24 +0000485 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000486 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000487 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000488 if (new == NULL)
489 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000490 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000491 changed = 0;
492 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000493 int c = Py_CHARMASK(*s++);
Barry Warsaw04d2d151997-01-03 23:46:51 +0000494 if (isupper(c)) {
Guido van Rossum5c850621992-09-11 23:55:51 +0000495 changed = 1;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000496 *s_new = tolower(c);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000497 } else
498 *s_new = c;
499 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000500 }
501 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000502 Py_DECREF(new);
503 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000504 return args;
505 }
506 return new;
507}
508
509
Guido van Rossum983c9301997-12-29 19:52:29 +0000510static char upper__doc__[] =
511"upper(s) -> string\n\
512\n\
513Return a copy of the string s converted to uppercase.";
514
Barry Warsawf5256011996-12-09 18:35:56 +0000515static PyObject *
Guido van Rossum5c850621992-09-11 23:55:51 +0000516strop_upper(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000517 PyObject *self; /* Not used */
518 PyObject *args;
Guido van Rossum5c850621992-09-11 23:55:51 +0000519{
Barry Warsaw04d2d151997-01-03 23:46:51 +0000520 char *s, *s_new;
521 int i, n;
522 PyObject *new;
523 int changed;
524
Guido van Rossum7e488981998-10-08 02:25:24 +0000525 if (!PyArg_Parse(args, "t#", &s, &n))
Barry Warsaw04d2d151997-01-03 23:46:51 +0000526 return NULL;
527 new = PyString_FromStringAndSize(NULL, n);
528 if (new == NULL)
529 return NULL;
530 s_new = PyString_AsString(new);
531 changed = 0;
532 for (i = 0; i < n; i++) {
533 int c = Py_CHARMASK(*s++);
534 if (islower(c)) {
535 changed = 1;
536 *s_new = toupper(c);
537 } else
538 *s_new = c;
539 s_new++;
540 }
541 if (!changed) {
542 Py_DECREF(new);
543 Py_INCREF(args);
544 return args;
545 }
546 return new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000547}
548
549
Guido van Rossum983c9301997-12-29 19:52:29 +0000550static char capitalize__doc__[] =
551"capitalize(s) -> string\n\
552\n\
553Return a copy of the string s with only its first character\n\
554capitalized.";
555
Barry Warsawf5256011996-12-09 18:35:56 +0000556static PyObject *
Guido van Rossum27457531996-06-12 04:24:52 +0000557strop_capitalize(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000558 PyObject *self; /* Not used */
559 PyObject *args;
Guido van Rossum27457531996-06-12 04:24:52 +0000560{
561 char *s, *s_new;
562 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000563 PyObject *new;
Guido van Rossum27457531996-06-12 04:24:52 +0000564 int changed;
565
Guido van Rossum7e488981998-10-08 02:25:24 +0000566 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum27457531996-06-12 04:24:52 +0000567 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000568 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum27457531996-06-12 04:24:52 +0000569 if (new == NULL)
570 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000571 s_new = PyString_AsString(new);
Guido van Rossum27457531996-06-12 04:24:52 +0000572 changed = 0;
Guido van Rossum529c9631996-06-17 16:59:33 +0000573 if (0 < n) {
Guido van Rossum27457531996-06-12 04:24:52 +0000574 int c = Py_CHARMASK(*s++);
575 if (islower(c)) {
576 changed = 1;
577 *s_new = toupper(c);
578 } else
579 *s_new = c;
580 s_new++;
581 }
582 for (i = 1; i < n; i++) {
583 int c = Py_CHARMASK(*s++);
584 if (isupper(c)) {
585 changed = 1;
586 *s_new = tolower(c);
587 } else
588 *s_new = c;
589 s_new++;
590 }
591 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000592 Py_DECREF(new);
593 Py_INCREF(args);
Guido van Rossum27457531996-06-12 04:24:52 +0000594 return args;
595 }
596 return new;
597}
598
599
Guido van Rossum54ec2881999-01-25 22:36:24 +0000600static char expandtabs__doc__[] =
601"expandtabs(string, [tabsize]) -> string\n\
602\n\
Guido van Rossum54ec2881999-01-25 22:36:24 +0000603Expand tabs in a string, i.e. replace them by one or more spaces,\n\
604depending on the current column and the given tab size (default 8).\n\
605The column number is reset to zero after each newline occurring in the\n\
606string. This doesn't understand other non-printing characters.";
607
608static PyObject *
609strop_expandtabs(self, args)
610 PyObject *self;
611 PyObject *args;
612{
613 /* Original by Fredrik Lundh */
614 char* e;
615 char* p;
616 char* q;
617 int i, j;
618 PyObject* out;
619 char* string;
620 int stringlen;
621 int tabsize = 8;
622
623 /* Get arguments */
Guido van Rossum43713e52000-02-29 13:59:29 +0000624 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
Guido van Rossum54ec2881999-01-25 22:36:24 +0000625 return NULL;
626 if (tabsize < 1) {
627 PyErr_SetString(PyExc_ValueError,
628 "tabsize must be at least 1");
629 return NULL;
630 }
631
632 /* First pass: determine size of output string */
633 i = j = 0; /* j: current column; i: total of previous lines */
634 e = string + stringlen;
635 for (p = string; p < e; p++) {
636 if (*p == '\t')
637 j += tabsize - (j%tabsize);
638 else {
639 j++;
640 if (*p == '\n') {
641 i += j;
642 j = 0;
643 }
644 }
645 }
646
647 /* Second pass: create output string and fill it */
648 out = PyString_FromStringAndSize(NULL, i+j);
649 if (out == NULL)
650 return NULL;
651
652 i = 0;
653 q = PyString_AS_STRING(out);
654
655 for (p = string; p < e; p++) {
656 if (*p == '\t') {
657 j = tabsize - (i%tabsize);
658 i += j;
659 while (j-- > 0)
660 *q++ = ' ';
661 } else {
662 *q++ = *p;
663 i++;
664 if (*p == '\n')
665 i = 0;
666 }
667 }
668
669 return out;
670}
671
672
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000673static char count__doc__[] =
674"count(s, sub[, start[, end]]) -> int\n\
675\n\
676Return the number of occurrences of substring sub in string\n\
677s[start:end]. Optional arguments start and end are\n\
678interpreted as in slice notation.";
679
680static PyObject *
681strop_count(self, args)
682 PyObject *self; /* Not used */
683 PyObject *args;
684{
685 char *s, *sub;
Guido van Rossumc5015831998-10-07 16:36:14 +0000686 int len, n;
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000687 int i = 0, last = INT_MAX;
688 int m, r;
689
Guido van Rossum43713e52000-02-29 13:59:29 +0000690 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000691 return NULL;
692 if (last > len)
693 last = len;
694 if (last < 0)
695 last += len;
696 if (last < 0)
697 last = 0;
698 if (i < 0)
699 i += len;
700 if (i < 0)
701 i = 0;
702 m = last + 1 - n;
703 if (n == 0)
704 return PyInt_FromLong((long) (m-i));
705
706 r = 0;
707 while (i < m) {
708 if (!memcmp(s+i, sub, n)) {
709 r++;
710 i += n;
711 } else {
712 i++;
713 }
714 }
715 return PyInt_FromLong((long) r);
716}
717
718
Guido van Rossum983c9301997-12-29 19:52:29 +0000719static char swapcase__doc__[] =
720"swapcase(s) -> string\n\
721\n\
722Return a copy of the string s with upper case characters\n\
723converted to lowercase and vice versa.";
724
Barry Warsawf5256011996-12-09 18:35:56 +0000725static PyObject *
Guido van Rossum5c850621992-09-11 23:55:51 +0000726strop_swapcase(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000727 PyObject *self; /* Not used */
728 PyObject *args;
Guido van Rossum5c850621992-09-11 23:55:51 +0000729{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000730 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000731 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000732 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000733 int changed;
734
Guido van Rossum7e488981998-10-08 02:25:24 +0000735 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000736 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000737 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000738 if (new == NULL)
739 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000740 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000741 changed = 0;
742 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000743 int c = Py_CHARMASK(*s++);
Guido van Rossum5c850621992-09-11 23:55:51 +0000744 if (islower(c)) {
745 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000746 *s_new = toupper(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000747 }
748 else if (isupper(c)) {
749 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000750 *s_new = tolower(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000751 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000752 else
753 *s_new = c;
754 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000755 }
756 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000757 Py_DECREF(new);
758 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000759 return args;
760 }
761 return new;
762}
763
764
Guido van Rossum983c9301997-12-29 19:52:29 +0000765static char atoi__doc__[] =
766"atoi(s [,base]) -> int\n\
767\n\
768Return the integer represented by the string s in the given\n\
769base, which defaults to 10. The string s must consist of one\n\
770or more digits, possibly preceded by a sign. If base is 0, it\n\
771is chosen from the leading characters of s, 0 for octal, 0x or\n\
7720X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
773accepted.";
774
Barry Warsawf5256011996-12-09 18:35:56 +0000775static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000776strop_atoi(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000777 PyObject *self; /* Not used */
778 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000779{
Guido van Rossumb6775db1994-08-01 11:34:53 +0000780 char *s, *end;
781 int base = 10;
782 long x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000783 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000784
Guido van Rossum43713e52000-02-29 13:59:29 +0000785 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000786 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000787
788 if ((base != 0 && base < 2) || base > 36) {
789 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
790 return NULL;
791 }
792
Guido van Rossumc35f9331996-09-11 23:30:42 +0000793 while (*s && isspace(Py_CHARMASK(*s)))
794 s++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000795 errno = 0;
796 if (base == 0 && s[0] == '0')
Barry Warsawf5256011996-12-09 18:35:56 +0000797 x = (long) PyOS_strtoul(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000798 else
Barry Warsawf5256011996-12-09 18:35:56 +0000799 x = PyOS_strtol(s, &end, base);
Guido van Rossum5bd69db1999-02-22 16:18:44 +0000800 if (end == s || !isalnum(end[-1]))
Guido van Rossum923fece51998-08-04 15:04:52 +0000801 goto bad;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000802 while (*end && isspace(Py_CHARMASK(*end)))
803 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000804 if (*end != '\0') {
Guido van Rossum923fece51998-08-04 15:04:52 +0000805 bad:
Guido van Rossumc35f9331996-09-11 23:30:42 +0000806 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000807 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000808 return NULL;
809 }
810 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000811 sprintf(buffer, "atoi() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000812 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000813 return NULL;
814 }
Barry Warsawf5256011996-12-09 18:35:56 +0000815 return PyInt_FromLong(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000816}
817
818
Guido van Rossum983c9301997-12-29 19:52:29 +0000819static char atol__doc__[] =
820"atol(s [,base]) -> long\n\
821\n\
822Return the long integer represented by the string s in the\n\
823given base, which defaults to 10. The string s must consist\n\
824of one or more digits, possibly preceded by a sign. If base\n\
825is 0, it is chosen from the leading characters of s, 0 for\n\
826octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
8270x or 0X is accepted. A trailing L or l is not accepted,\n\
828unless base is 0.";
829
Barry Warsawf5256011996-12-09 18:35:56 +0000830static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000831strop_atol(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000832 PyObject *self; /* Not used */
833 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000834{
835 char *s, *end;
836 int base = 10;
Barry Warsawf5256011996-12-09 18:35:56 +0000837 PyObject *x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000838 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000839
Guido van Rossum43713e52000-02-29 13:59:29 +0000840 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000841 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000842
843 if ((base != 0 && base < 2) || base > 36) {
844 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
845 return NULL;
846 }
847
Guido van Rossumc35f9331996-09-11 23:30:42 +0000848 while (*s && isspace(Py_CHARMASK(*s)))
849 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000850 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000851 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000852 return NULL;
853 }
Barry Warsawf5256011996-12-09 18:35:56 +0000854 x = PyLong_FromString(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000855 if (x == NULL)
856 return NULL;
857 if (base == 0 && (*end == 'l' || *end == 'L'))
858 end++;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000859 while (*end && isspace(Py_CHARMASK(*end)))
860 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000861 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000862 sprintf(buffer, "invalid literal for atol(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000863 PyErr_SetString(PyExc_ValueError, buffer);
864 Py_DECREF(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000865 return NULL;
866 }
867 return x;
868}
869
870
Guido van Rossum983c9301997-12-29 19:52:29 +0000871static char atof__doc__[] =
872"atof(s) -> float\n\
873\n\
874Return the floating point number represented by the string s.";
875
Barry Warsawf5256011996-12-09 18:35:56 +0000876static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000877strop_atof(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000878 PyObject *self; /* Not used */
879 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000880{
Tim Petersdbd9ba62000-07-09 03:09:57 +0000881 extern double strtod(const char *, char **);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000882 char *s, *end;
883 double x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000884 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000885
Guido van Rossum43713e52000-02-29 13:59:29 +0000886 if (!PyArg_ParseTuple(args, "s:atof", &s))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000887 return NULL;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000888 while (*s && isspace(Py_CHARMASK(*s)))
889 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000890 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000891 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000892 return NULL;
893 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000894 errno = 0;
Guido van Rossum52fa3a61997-02-14 22:59:58 +0000895 PyFPE_START_PROTECT("strop_atof", return 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000896 x = strtod(s, &end);
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000897 PyFPE_END_PROTECT(x)
Guido van Rossumc35f9331996-09-11 23:30:42 +0000898 while (*end && isspace(Py_CHARMASK(*end)))
899 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000900 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000901 sprintf(buffer, "invalid literal for atof(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000902 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000903 return NULL;
904 }
905 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000906 sprintf(buffer, "atof() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000907 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000908 return NULL;
909 }
Barry Warsawf5256011996-12-09 18:35:56 +0000910 return PyFloat_FromDouble(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000911}
912
913
Guido van Rossum983c9301997-12-29 19:52:29 +0000914static char maketrans__doc__[] =
915"maketrans(frm, to) -> string\n\
916\n\
917Return a translation table (a string of 256 bytes long)\n\
918suitable for use in string.translate. The strings frm and to\n\
919must be of the same length.";
920
Guido van Rossumed7253c1996-07-23 18:12:39 +0000921static PyObject *
922strop_maketrans(self, args)
923 PyObject *self; /* Not used */
924 PyObject *args;
925{
Guido van Rossume0548b81997-01-06 16:50:09 +0000926 unsigned char *c, *from=NULL, *to=NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000927 int i, fromlen=0, tolen=0;
Guido van Rossume0548b81997-01-06 16:50:09 +0000928 PyObject *result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000929
Guido van Rossum43713e52000-02-29 13:59:29 +0000930 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000931 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000932
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000933 if (fromlen != tolen) {
Barry Warsawf5256011996-12-09 18:35:56 +0000934 PyErr_SetString(PyExc_ValueError,
Guido van Rossumed7253c1996-07-23 18:12:39 +0000935 "maketrans arguments must have same length");
936 return NULL;
937 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000938
939 result = PyString_FromStringAndSize((char *)NULL, 256);
940 if (result == NULL)
941 return NULL;
942 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000943 for (i = 0; i < 256; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000944 c[i]=(unsigned char)i;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000945 for (i = 0; i < fromlen; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000946 c[from[i]]=to[i];
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000947
Guido van Rossume0548b81997-01-06 16:50:09 +0000948 return result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000949}
950
951
Guido van Rossum983c9301997-12-29 19:52:29 +0000952static char translate__doc__[] =
953"translate(s,table [,deletechars]) -> string\n\
954\n\
955Return a copy of the string s, where all characters occurring\n\
956in the optional argument deletechars are removed, and the\n\
957remaining characters have been mapped through the given\n\
958translation table, which must be a string of length 256.";
959
Barry Warsawf5256011996-12-09 18:35:56 +0000960static PyObject *
Guido van Rossuma3127e81995-09-13 17:39:06 +0000961strop_translate(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000962 PyObject *self;
963 PyObject *args;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000964{
Guido van Rossume0548b81997-01-06 16:50:09 +0000965 register char *input, *table, *output;
966 register int i, c, changed = 0;
967 PyObject *input_obj;
968 char *table1, *output_start, *del_table=NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000969 int inlen, tablen, dellen = 0;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000970 PyObject *result;
Guido van Rossume0548b81997-01-06 16:50:09 +0000971 int trans_table[256];
Guido van Rossuma3127e81995-09-13 17:39:06 +0000972
Guido van Rossum43713e52000-02-29 13:59:29 +0000973 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
Guido van Rossume0548b81997-01-06 16:50:09 +0000974 &table1, &tablen, &del_table, &dellen))
Guido van Rossuma3127e81995-09-13 17:39:06 +0000975 return NULL;
976 if (tablen != 256) {
Barry Warsawf5256011996-12-09 18:35:56 +0000977 PyErr_SetString(PyExc_ValueError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000978 "translation table must be 256 characters long");
Guido van Rossuma3127e81995-09-13 17:39:06 +0000979 return NULL;
980 }
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000981
Guido van Rossume0548b81997-01-06 16:50:09 +0000982 table = table1;
983 inlen = PyString_Size(input_obj);
Guido van Rossumed7253c1996-07-23 18:12:39 +0000984 result = PyString_FromStringAndSize((char *)NULL, inlen);
Guido van Rossuma3127e81995-09-13 17:39:06 +0000985 if (result == NULL)
986 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000987 output_start = output = PyString_AsString(result);
Guido van Rossume0548b81997-01-06 16:50:09 +0000988 input = PyString_AsString(input_obj);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000989
Guido van Rossume0548b81997-01-06 16:50:09 +0000990 if (dellen == 0) {
991 /* If no deletions are required, use faster code */
992 for (i = inlen; --i >= 0; ) {
993 c = Py_CHARMASK(*input++);
994 if (Py_CHARMASK((*output++ = table[c])) != c)
995 changed = 1;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000996 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000997 if (changed)
998 return result;
999 Py_DECREF(result);
1000 Py_INCREF(input_obj);
1001 return input_obj;
Guido van Rossuma3127e81995-09-13 17:39:06 +00001002 }
Guido van Rossume0548b81997-01-06 16:50:09 +00001003
1004 for (i = 0; i < 256; i++)
1005 trans_table[i] = Py_CHARMASK(table[i]);
1006
Guido van Rossum983c9301997-12-29 19:52:29 +00001007 for (i = 0; i < dellen; i++)
Guido van Rossum1ed5e571997-04-29 21:34:16 +00001008 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Guido van Rossume0548b81997-01-06 16:50:09 +00001009
1010 for (i = inlen; --i >= 0; ) {
1011 c = Py_CHARMASK(*input++);
1012 if (trans_table[c] != -1)
1013 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1014 continue;
1015 changed = 1;
1016 }
1017 if (!changed) {
1018 Py_DECREF(result);
1019 Py_INCREF(input_obj);
1020 return input_obj;
1021 }
1022 /* Fix the size of the resulting string */
1023 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
Guido van Rossum983c9301997-12-29 19:52:29 +00001024 return NULL;
Guido van Rossuma3127e81995-09-13 17:39:06 +00001025 return result;
1026}
1027
1028
Guido van Rossum101923b1997-04-02 06:11:18 +00001029/* What follows is used for implementing replace(). Perry Stoll. */
1030
1031/*
1032 mymemfind
1033
1034 strstr replacement for arbitrary blocks of memory.
1035
Barry Warsaw51ac5802000-03-20 16:36:48 +00001036 Locates the first occurrence in the memory pointed to by MEM of the
Guido van Rossum101923b1997-04-02 06:11:18 +00001037 contents of memory pointed to by PAT. Returns the index into MEM if
1038 found, or -1 if not found. If len of PAT is greater than length of
Guido van Rossum983c9301997-12-29 19:52:29 +00001039 MEM, the function returns -1.
Guido van Rossum101923b1997-04-02 06:11:18 +00001040*/
1041static int mymemfind(mem, len, pat, pat_len)
1042 char *mem;
1043 int len;
1044 char *pat;
1045 int pat_len;
1046{
1047 register int ii;
1048
1049 /* pattern can not occur in the last pat_len-1 chars */
1050 len -= pat_len;
1051
1052 for (ii = 0; ii <= len; ii++) {
1053 if (mem[ii] == pat[0] &&
1054 (pat_len == 1 ||
1055 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1056 return ii;
1057 }
1058 }
1059 return -1;
1060}
1061
1062/*
1063 mymemcnt
1064
1065 Return the number of distinct times PAT is found in MEM.
1066 meaning mem=1111 and pat==11 returns 2.
1067 mem=11111 and pat==11 also return 2.
1068 */
1069static int mymemcnt(mem, len, pat, pat_len)
1070 char *mem;
1071 int len;
1072 char *pat;
1073 int pat_len;
1074{
1075 register int offset = 0;
1076 int nfound = 0;
1077
1078 while (len >= 0) {
1079 offset = mymemfind(mem, len, pat, pat_len);
1080 if (offset == -1)
1081 break;
1082 mem += offset + pat_len;
1083 len -= offset + pat_len;
1084 nfound++;
1085 }
1086 return nfound;
1087}
1088
Guido van Rossum983c9301997-12-29 19:52:29 +00001089/*
Guido van Rossum101923b1997-04-02 06:11:18 +00001090 mymemreplace
1091
1092 Return a string in which all occurences of PAT in memory STR are
Guido van Rossum983c9301997-12-29 19:52:29 +00001093 replaced with SUB.
Guido van Rossum101923b1997-04-02 06:11:18 +00001094
1095 If length of PAT is less than length of STR or there are no occurences
1096 of PAT in STR, then the original string is returned. Otherwise, a new
1097 string is allocated here and returned.
Guido van Rossum983c9301997-12-29 19:52:29 +00001098
Guido van Rossum101923b1997-04-02 06:11:18 +00001099 on return, out_len is:
1100 the length of output string, or
1101 -1 if the input string is returned, or
1102 unchanged if an error occurs (no memory).
1103
1104 return value is:
1105 the new string allocated locally, or
1106 NULL if an error occurred.
1107*/
Barry Warsawf577c081997-11-29 00:10:07 +00001108static char *mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001109 char *str;
1110 int len; /* input string */
1111 char *pat;
1112 int pat_len; /* pattern string to find */
1113 char *sub;
1114 int sub_len; /* substitution string */
Barry Warsawf577c081997-11-29 00:10:07 +00001115 int count; /* number of replacements, 0 == all */
Guido van Rossum101923b1997-04-02 06:11:18 +00001116 int *out_len;
1117
1118{
1119 char *out_s;
1120 char *new_s;
1121 int nfound, offset, new_len;
1122
1123 if (len == 0 || pat_len > len)
1124 goto return_same;
1125
1126 /* find length of output string */
1127 nfound = mymemcnt(str, len, pat, pat_len);
Barry Warsawf577c081997-11-29 00:10:07 +00001128 if (count > 0)
1129 nfound = nfound > count ? count : nfound;
Guido van Rossum101923b1997-04-02 06:11:18 +00001130 if (nfound == 0)
1131 goto return_same;
1132 new_len = len + nfound*(sub_len - pat_len);
1133
Guido van Rossumb18618d2000-05-03 23:44:39 +00001134 new_s = (char *)PyMem_MALLOC(new_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001135 if (new_s == NULL) return NULL;
1136
1137 *out_len = new_len;
1138 out_s = new_s;
1139
1140 while (len > 0) {
1141 /* find index of next instance of pattern */
1142 offset = mymemfind(str, len, pat, pat_len);
1143 /* if not found, break out of loop */
1144 if (offset == -1) break;
1145
1146 /* copy non matching part of input string */
1147 memcpy(new_s, str, offset); /* copy part of str before pat */
1148 str += offset + pat_len; /* move str past pattern */
1149 len -= offset + pat_len; /* reduce length of str remaining */
1150
1151 /* copy substitute into the output string */
1152 new_s += offset; /* move new_s to dest for sub string */
1153 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1154 new_s += sub_len; /* offset new_s past sub string */
Barry Warsawf577c081997-11-29 00:10:07 +00001155
1156 /* break when we've done count replacements */
1157 if (--count == 0) break;
Guido van Rossum101923b1997-04-02 06:11:18 +00001158 }
1159 /* copy any remaining values into output string */
1160 if (len > 0)
1161 memcpy(new_s, str, len);
1162 return out_s;
1163
1164 return_same:
1165 *out_len = -1;
1166 return str;
1167}
1168
1169
Guido van Rossum983c9301997-12-29 19:52:29 +00001170static char replace__doc__[] =
1171"replace (str, old, new[, maxsplit]) -> string\n\
1172\n\
1173Return a copy of string str with all occurrences of substring\n\
1174old replaced by new. If the optional argument maxsplit is\n\
1175given, only the first maxsplit occurrences are replaced.";
1176
1177static PyObject *
Guido van Rossum101923b1997-04-02 06:11:18 +00001178strop_replace(self, args)
1179 PyObject *self; /* Not used */
1180 PyObject *args;
1181{
1182 char *str, *pat,*sub,*new_s;
1183 int len,pat_len,sub_len,out_len;
Barry Warsawf577c081997-11-29 00:10:07 +00001184 int count = 0;
Guido van Rossum101923b1997-04-02 06:11:18 +00001185 PyObject *new;
1186
Guido van Rossum43713e52000-02-29 13:59:29 +00001187 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
Barry Warsawf577c081997-11-29 00:10:07 +00001188 &str, &len, &pat, &pat_len, &sub, &sub_len,
1189 &count))
Guido van Rossum101923b1997-04-02 06:11:18 +00001190 return NULL;
Guido van Rossum4ccda151998-05-14 02:36:29 +00001191 if (pat_len <= 0) {
1192 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1193 return NULL;
1194 }
Barry Warsawf577c081997-11-29 00:10:07 +00001195 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001196 if (new_s == NULL) {
1197 PyErr_NoMemory();
1198 return NULL;
1199 }
1200 if (out_len == -1) {
1201 /* we're returning another reference to the input string */
1202 new = PyTuple_GetItem(args, 0);
1203 Py_XINCREF(new);
1204 }
1205 else {
1206 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001207 PyMem_FREE(new_s);
Guido van Rossum101923b1997-04-02 06:11:18 +00001208 }
1209 return new;
1210}
1211
1212
Guido van Rossume270b431992-09-03 20:21:07 +00001213/* List of functions defined in the module */
1214
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001215static PyMethodDef
1216strop_methods[] = {
Guido van Rossum983c9301997-12-29 19:52:29 +00001217 {"atof", strop_atof, 1, atof__doc__},
1218 {"atoi", strop_atoi, 1, atoi__doc__},
1219 {"atol", strop_atol, 1, atol__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001220 {"capitalize", strop_capitalize, 0, capitalize__doc__},
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +00001221 {"count", strop_count, 1, count__doc__},
Guido van Rossum54ec2881999-01-25 22:36:24 +00001222 {"expandtabs", strop_expandtabs, 1, expandtabs__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001223 {"find", strop_find, 1, find__doc__},
1224 {"join", strop_joinfields, 1, joinfields__doc__},
1225 {"joinfields", strop_joinfields, 1, joinfields__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001226 {"lstrip", strop_lstrip, 0, lstrip__doc__},
1227 {"lower", strop_lower, 0, lower__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001228 {"maketrans", strop_maketrans, 1, maketrans__doc__},
1229 {"replace", strop_replace, 1, replace__doc__},
1230 {"rfind", strop_rfind, 1, rfind__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001231 {"rstrip", strop_rstrip, 0, rstrip__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001232 {"split", strop_splitfields, 1, splitfields__doc__},
1233 {"splitfields", strop_splitfields, 1, splitfields__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001234 {"strip", strop_strip, 0, strip__doc__},
1235 {"swapcase", strop_swapcase, 0, swapcase__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001236 {"translate", strop_translate, 1, translate__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001237 {"upper", strop_upper, 0, upper__doc__},
Guido van Rossume270b431992-09-03 20:21:07 +00001238 {NULL, NULL} /* sentinel */
1239};
1240
1241
Guido van Rossum3886bb61998-12-04 18:50:17 +00001242DL_EXPORT(void)
Guido van Rossume270b431992-09-03 20:21:07 +00001243initstrop()
1244{
Barry Warsawf5256011996-12-09 18:35:56 +00001245 PyObject *m, *d, *s;
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001246 char buf[256];
1247 int c, n;
Guido van Rossum983c9301997-12-29 19:52:29 +00001248 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1249 (PyObject*)NULL, PYTHON_API_VERSION);
Barry Warsawf5256011996-12-09 18:35:56 +00001250 d = PyModule_GetDict(m);
Guido van Rossume22e6441993-07-09 10:51:31 +00001251
1252 /* Create 'whitespace' object */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001253 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001254 for (c = 0; c < 256; c++) {
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001255 if (isspace(c))
1256 buf[n++] = c;
1257 }
Barry Warsawf5256011996-12-09 18:35:56 +00001258 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001259 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001260 PyDict_SetItemString(d, "whitespace", s);
1261 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001262 }
1263 /* Create 'lowercase' object */
1264 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001265 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001266 if (islower(c))
1267 buf[n++] = c;
1268 }
Barry Warsawf5256011996-12-09 18:35:56 +00001269 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001270 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001271 PyDict_SetItemString(d, "lowercase", s);
1272 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001273 }
1274
1275 /* Create 'uppercase' object */
1276 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001277 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001278 if (isupper(c))
1279 buf[n++] = c;
1280 }
Barry Warsawf5256011996-12-09 18:35:56 +00001281 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001282 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001283 PyDict_SetItemString(d, "uppercase", s);
1284 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001285 }
1286
Barry Warsawf5256011996-12-09 18:35:56 +00001287 if (PyErr_Occurred())
1288 Py_FatalError("can't initialize module strop");
Guido van Rossume270b431992-09-03 20:21:07 +00001289}