blob: bd56ee03f73eb202e55a45c79d2f4e2a8f1bcce4 [file] [log] [blame]
Guido van Rossume270b431992-09-03 20:21:07 +00001/* strop module */
2
Guido van Rossum983c9301997-12-29 19:52:29 +00003static char strop_module__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +00004"Common string manipulations, optimized for speed.\n"
5"\n"
6"Always use \"import string\" rather than referencing\n"
7"this module directly.";
Guido van Rossum983c9301997-12-29 19:52:29 +00008
Barry Warsawf5256011996-12-09 18:35:56 +00009#include "Python.h"
Guido van Rossume270b431992-09-03 20:21:07 +000010
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000011#include <ctype.h>
Guido van Rossume22e6441993-07-09 10:51:31 +000012/* XXX This file assumes that the <ctype.h> is*() functions
13 XXX are defined for all 8-bit characters! */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000014
Guido van Rossum2e0a6542001-05-15 02:14:44 +000015#define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
16 "strop functions are obsolete; use string methods")) \
17 return NULL
18
Guido van Rossum7999a5c1996-08-08 19:16:15 +000019/* The lstrip(), rstrip() and strip() functions are implemented
20 in do_strip(), which uses an additional parameter to indicate what
21 type of strip should occur. */
22
23#define LEFTSTRIP 0
24#define RIGHTSTRIP 1
25#define BOTHSTRIP 2
26
Guido van Rossume270b431992-09-03 20:21:07 +000027
Barry Warsawf5256011996-12-09 18:35:56 +000028static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000029split_whitespace(char *s, int len, int maxsplit)
Guido van Rossum009e79b1995-05-03 17:40:23 +000030{
Barry Warsawe8fc29c1997-01-03 22:45:34 +000031 int i = 0, j, err;
32 int countsplit = 0;
33 PyObject* item;
34 PyObject *list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +000035
Guido van Rossume270b431992-09-03 20:21:07 +000036 if (list == NULL)
37 return NULL;
38
Guido van Rossume270b431992-09-03 20:21:07 +000039 while (i < len) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +000040 while (i < len && isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000041 i = i+1;
42 }
43 j = i;
Guido van Rossumee1813d1995-02-14 00:58:59 +000044 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000045 i = i+1;
46 }
47 if (j < i) {
Barry Warsawf5256011996-12-09 18:35:56 +000048 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000049 if (item == NULL)
50 goto finally;
51
Barry Warsawf5256011996-12-09 18:35:56 +000052 err = PyList_Append(list, item);
53 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000054 if (err < 0)
55 goto finally;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000056
57 countsplit++;
Barry Warsaw93be92d1997-12-02 00:29:30 +000058 while (i < len && isspace(Py_CHARMASK(s[i]))) {
59 i = i+1;
60 }
61 if (maxsplit && (countsplit >= maxsplit) && i < len) {
Barry Warsawf5256011996-12-09 18:35:56 +000062 item = PyString_FromStringAndSize(
63 s+i, (int)(len - i));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000064 if (item == NULL)
65 goto finally;
66
Barry Warsawf5256011996-12-09 18:35:56 +000067 err = PyList_Append(list, item);
68 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000069 if (err < 0)
70 goto finally;
71
Guido van Rossum7999a5c1996-08-08 19:16:15 +000072 i = len;
73 }
Guido van Rossume270b431992-09-03 20:21:07 +000074 }
75 }
Guido van Rossume270b431992-09-03 20:21:07 +000076 return list;
Barry Warsawe8fc29c1997-01-03 22:45:34 +000077 finally:
78 Py_DECREF(list);
79 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +000080}
81
82
Guido van Rossum983c9301997-12-29 19:52:29 +000083static char splitfields__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +000084"split(s [,sep [,maxsplit]]) -> list of strings\n"
85"splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
86"\n"
87"Return a list of the words in the string s, using sep as the\n"
88"delimiter string. If maxsplit is nonzero, splits into at most\n"
89"maxsplit words. If sep is not specified, any whitespace string\n"
90"is a separator. Maxsplit defaults to 0.\n"
91"\n"
92"(split and splitfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +000093
Barry Warsawf5256011996-12-09 18:35:56 +000094static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000095strop_splitfields(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +000096{
Guido van Rossum572d2d91993-11-05 10:14:49 +000097 int len, n, i, j, err;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000098 int splitcount, maxsplit;
Guido van Rossume270b431992-09-03 20:21:07 +000099 char *s, *sub;
Barry Warsawf5256011996-12-09 18:35:56 +0000100 PyObject *list, *item;
Guido van Rossume270b431992-09-03 20:21:07 +0000101
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000102 WARN;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000103 sub = NULL;
104 n = 0;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000105 splitcount = 0;
106 maxsplit = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000107 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
Guido van Rossume270b431992-09-03 20:21:07 +0000108 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000109 if (sub == NULL)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000110 return split_whitespace(s, len, maxsplit);
Guido van Rossume270b431992-09-03 20:21:07 +0000111 if (n == 0) {
Barry Warsawf5256011996-12-09 18:35:56 +0000112 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossume270b431992-09-03 20:21:07 +0000113 return NULL;
114 }
115
Barry Warsawf5256011996-12-09 18:35:56 +0000116 list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +0000117 if (list == NULL)
118 return NULL;
119
120 i = j = 0;
121 while (i+n <= len) {
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000122 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000123 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000124 if (item == NULL)
125 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000126 err = PyList_Append(list, item);
127 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000128 if (err < 0)
129 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000130 i = j = i + n;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000131 splitcount++;
132 if (maxsplit && (splitcount >= maxsplit))
133 break;
Guido van Rossume270b431992-09-03 20:21:07 +0000134 }
135 else
136 i++;
137 }
Barry Warsawf5256011996-12-09 18:35:56 +0000138 item = PyString_FromStringAndSize(s+j, (int)(len-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000139 if (item == NULL)
140 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000141 err = PyList_Append(list, item);
142 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000143 if (err < 0)
144 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000145
146 return list;
Guido van Rossum572d2d91993-11-05 10:14:49 +0000147
148 fail:
Barry Warsawf5256011996-12-09 18:35:56 +0000149 Py_DECREF(list);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000150 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +0000151}
152
153
Guido van Rossum983c9301997-12-29 19:52:29 +0000154static char joinfields__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000155"join(list [,sep]) -> string\n"
156"joinfields(list [,sep]) -> string\n"
157"\n"
158"Return a string composed of the words in list, with\n"
159"intervening occurrences of sep. Sep defaults to a single\n"
160"space.\n"
161"\n"
162"(join and joinfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +0000163
Barry Warsawf5256011996-12-09 18:35:56 +0000164static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000165strop_joinfields(PyObject *self, PyObject *args)
Guido van Rossumc89705d1992-11-26 08:54:07 +0000166{
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000167 PyObject *seq;
168 char *sep = NULL;
169 int seqlen, seplen = 0;
170 int i, reslen = 0, slen = 0, sz = 100;
171 PyObject *res = NULL;
172 char* p = NULL;
173 intargfunc getitemfunc;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000174
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000175 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +0000176 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
Guido van Rossumc89705d1992-11-26 08:54:07 +0000177 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000178 if (sep == NULL) {
179 sep = " ";
180 seplen = 1;
181 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000182
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000183 seqlen = PySequence_Size(seq);
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000184 if (seqlen < 0 && PyErr_Occurred())
185 return NULL;
186
187 if (seqlen == 1) {
188 /* Optimization if there's only one item */
189 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000190 if (item && !PyString_Check(item)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000191 PyErr_SetString(PyExc_TypeError,
192 "first argument must be sequence of strings");
Guido van Rossumbf338301998-10-19 13:38:36 +0000193 Py_DECREF(item);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000194 return NULL;
195 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000196 return item;
197 }
198
199 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
200 return NULL;
201 p = PyString_AsString(res);
202
203 /* optimize for lists, since it's the most common case. all others
204 * (tuples and arbitrary sequences) just use the sequence abstract
205 * interface.
Barry Warsaw04d2d151997-01-03 23:46:51 +0000206 */
207 if (PyList_Check(seq)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000208 for (i = 0; i < seqlen; i++) {
209 PyObject *item = PyList_GET_ITEM(seq, i);
210 if (!PyString_Check(item)) {
211 PyErr_SetString(PyExc_TypeError,
212 "first argument must be sequence of strings");
213 Py_DECREF(res);
214 return NULL;
215 }
216 slen = PyString_GET_SIZE(item);
217 while (reslen + slen + seplen >= sz) {
218 if (_PyString_Resize(&res, sz * 2)) {
219 Py_DECREF(res);
220 return NULL;
221 }
222 sz *= 2;
223 p = PyString_AsString(res) + reslen;
224 }
225 if (i > 0) {
226 memcpy(p, sep, seplen);
227 p += seplen;
228 reslen += seplen;
229 }
230 memcpy(p, PyString_AS_STRING(item), slen);
231 p += slen;
232 reslen += slen;
233 }
234 if (_PyString_Resize(&res, reslen)) {
235 Py_DECREF(res);
236 res = NULL;
237 }
238 return res;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000239 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000240
241 if (seq->ob_type->tp_as_sequence == NULL ||
242 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
243 {
Barry Warsawf5256011996-12-09 18:35:56 +0000244 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000245 "first argument must be a sequence");
Guido van Rossumc89705d1992-11-26 08:54:07 +0000246 return NULL;
247 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000248 /* This is now type safe */
Guido van Rossumc89705d1992-11-26 08:54:07 +0000249 for (i = 0; i < seqlen; i++) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000250 PyObject *item = getitemfunc(seq, i);
251 if (!item || !PyString_Check(item)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000252 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000253 "first argument must be sequence of strings");
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000254 Py_DECREF(res);
255 Py_XDECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000256 return NULL;
257 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000258 slen = PyString_GET_SIZE(item);
259 while (reslen + slen + seplen >= sz) {
260 if (_PyString_Resize(&res, sz * 2)) {
261 Py_DECREF(res);
262 Py_DECREF(item);
263 return NULL;
264 }
265 sz *= 2;
266 p = PyString_AsString(res) + reslen;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000267 }
Guido van Rossumc89705d1992-11-26 08:54:07 +0000268 if (i > 0) {
269 memcpy(p, sep, seplen);
270 p += seplen;
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000271 reslen += seplen;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000272 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000273 memcpy(p, PyString_AS_STRING(item), slen);
274 p += slen;
275 reslen += slen;
276 Py_DECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000277 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000278 if (_PyString_Resize(&res, reslen)) {
279 Py_DECREF(res);
280 res = NULL;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000281 }
282 return res;
283}
284
Guido van Rossum983c9301997-12-29 19:52:29 +0000285
286static char find__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000287"find(s, sub [,start [,end]]) -> in\n"
288"\n"
289"Return the lowest index in s where substring sub is found,\n"
290"such that sub is contained within s[start,end]. Optional\n"
291"arguments start and end are interpreted as in slice notation.\n"
292"\n"
293"Return -1 on failure.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000294
Barry Warsawf5256011996-12-09 18:35:56 +0000295static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000296strop_find(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000297{
298 char *s, *sub;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000299 int len, n, i = 0, last = INT_MAX;
Guido van Rossume270b431992-09-03 20:21:07 +0000300
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000301 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +0000302 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000303 return NULL;
304
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000305 if (last > len)
306 last = len;
307 if (last < 0)
308 last += len;
309 if (last < 0)
310 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000311 if (i < 0)
312 i += len;
313 if (i < 0)
Guido van Rossume270b431992-09-03 20:21:07 +0000314 i = 0;
Guido van Rossume270b431992-09-03 20:21:07 +0000315
Guido van Rossum031c6311998-03-24 04:19:22 +0000316 if (n == 0 && i <= last)
Barry Warsawf5256011996-12-09 18:35:56 +0000317 return PyInt_FromLong((long)i);
Guido van Rossume270b431992-09-03 20:21:07 +0000318
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000319 last -= n;
320 for (; i <= last; ++i)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000321 if (s[i] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000322 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000323 return PyInt_FromLong((long)i);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000324
Barry Warsawf5256011996-12-09 18:35:56 +0000325 return PyInt_FromLong(-1L);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000326}
327
328
Guido van Rossum983c9301997-12-29 19:52:29 +0000329static char rfind__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000330"rfind(s, sub [,start [,end]]) -> int\n"
331"\n"
332"Return the highest index in s where substring sub is found,\n"
333"such that sub is contained within s[start,end]. Optional\n"
334"arguments start and end are interpreted as in slice notation.\n"
335"\n"
336"Return -1 on failure.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000337
Barry Warsawf5256011996-12-09 18:35:56 +0000338static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000339strop_rfind(PyObject *self, PyObject *args)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000340{
341 char *s, *sub;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000342 int len, n, j;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000343 int i = 0, last = INT_MAX;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000344
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000345 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +0000346 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000347 return NULL;
348
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000349 if (last > len)
350 last = len;
351 if (last < 0)
352 last += len;
353 if (last < 0)
354 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000355 if (i < 0)
356 i += len;
357 if (i < 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000358 i = 0;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000359
Guido van Rossum031c6311998-03-24 04:19:22 +0000360 if (n == 0 && i <= last)
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000361 return PyInt_FromLong((long)last);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000362
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000363 for (j = last-n; j >= i; --j)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000364 if (s[j] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000365 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000366 return PyInt_FromLong((long)j);
Guido van Rossume270b431992-09-03 20:21:07 +0000367
Barry Warsawf5256011996-12-09 18:35:56 +0000368 return PyInt_FromLong(-1L);
Guido van Rossume270b431992-09-03 20:21:07 +0000369}
370
Guido van Rossum983c9301997-12-29 19:52:29 +0000371
Barry Warsawf5256011996-12-09 18:35:56 +0000372static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000373do_strip(PyObject *args, int striptype)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000374{
375 char *s;
376 int len, i, j;
377
378
Guido van Rossum7e488981998-10-08 02:25:24 +0000379 if (!PyArg_Parse(args, "t#", &s, &len))
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000380 return NULL;
381
382 i = 0;
383 if (striptype != RIGHTSTRIP) {
384 while (i < len && isspace(Py_CHARMASK(s[i]))) {
385 i++;
386 }
387 }
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000388
389 j = len;
390 if (striptype != LEFTSTRIP) {
391 do {
392 j--;
393 } while (j >= i && isspace(Py_CHARMASK(s[j])));
394 j++;
395 }
396
397 if (i == 0 && j == len) {
Barry Warsawf5256011996-12-09 18:35:56 +0000398 Py_INCREF(args);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000399 return args;
400 }
401 else
Barry Warsawf5256011996-12-09 18:35:56 +0000402 return PyString_FromStringAndSize(s+i, j-i);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000403}
404
Guido van Rossume270b431992-09-03 20:21:07 +0000405
Guido van Rossum983c9301997-12-29 19:52:29 +0000406static char strip__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000407"strip(s) -> string\n"
408"\n"
409"Return a copy of the string s with leading and trailing\n"
410"whitespace removed.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000411
Barry Warsawf5256011996-12-09 18:35:56 +0000412static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000413strop_strip(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000414{
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000415 WARN;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000416 return do_strip(args, BOTHSTRIP);
417}
Guido van Rossume270b431992-09-03 20:21:07 +0000418
Guido van Rossum983c9301997-12-29 19:52:29 +0000419
420static char lstrip__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000421"lstrip(s) -> string\n"
422"\n"
423"Return a copy of the string s with leading whitespace removed.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000424
Barry Warsawf5256011996-12-09 18:35:56 +0000425static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000426strop_lstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000427{
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000428 WARN;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000429 return do_strip(args, LEFTSTRIP);
430}
Guido van Rossume270b431992-09-03 20:21:07 +0000431
Guido van Rossum983c9301997-12-29 19:52:29 +0000432
433static char rstrip__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000434"rstrip(s) -> string\n"
435"\n"
436"Return a copy of the string s with trailing whitespace removed.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000437
Barry Warsawf5256011996-12-09 18:35:56 +0000438static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000439strop_rstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000440{
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000441 WARN;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000442 return do_strip(args, RIGHTSTRIP);
Guido van Rossume270b431992-09-03 20:21:07 +0000443}
444
445
Guido van Rossum983c9301997-12-29 19:52:29 +0000446static char lower__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000447"lower(s) -> string\n"
448"\n"
449"Return a copy of the string s converted to lowercase.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000450
Barry Warsawf5256011996-12-09 18:35:56 +0000451static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000452strop_lower(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000453{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000454 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000455 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000456 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000457 int changed;
458
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000459 WARN;
Guido van Rossum7e488981998-10-08 02:25:24 +0000460 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000461 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000462 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000463 if (new == NULL)
464 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000465 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000466 changed = 0;
467 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000468 int c = Py_CHARMASK(*s++);
Barry Warsaw04d2d151997-01-03 23:46:51 +0000469 if (isupper(c)) {
Guido van Rossum5c850621992-09-11 23:55:51 +0000470 changed = 1;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000471 *s_new = tolower(c);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000472 } else
473 *s_new = c;
474 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000475 }
476 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000477 Py_DECREF(new);
478 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000479 return args;
480 }
481 return new;
482}
483
484
Guido van Rossum983c9301997-12-29 19:52:29 +0000485static char upper__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000486"upper(s) -> string\n"
487"\n"
488"Return a copy of the string s converted to uppercase.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000489
Barry Warsawf5256011996-12-09 18:35:56 +0000490static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000491strop_upper(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000492{
Barry Warsaw04d2d151997-01-03 23:46:51 +0000493 char *s, *s_new;
494 int i, n;
495 PyObject *new;
496 int changed;
497
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000498 WARN;
Guido van Rossum7e488981998-10-08 02:25:24 +0000499 if (!PyArg_Parse(args, "t#", &s, &n))
Barry Warsaw04d2d151997-01-03 23:46:51 +0000500 return NULL;
501 new = PyString_FromStringAndSize(NULL, n);
502 if (new == NULL)
503 return NULL;
504 s_new = PyString_AsString(new);
505 changed = 0;
506 for (i = 0; i < n; i++) {
507 int c = Py_CHARMASK(*s++);
508 if (islower(c)) {
509 changed = 1;
510 *s_new = toupper(c);
511 } else
512 *s_new = c;
513 s_new++;
514 }
515 if (!changed) {
516 Py_DECREF(new);
517 Py_INCREF(args);
518 return args;
519 }
520 return new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000521}
522
523
Guido van Rossum983c9301997-12-29 19:52:29 +0000524static char capitalize__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000525"capitalize(s) -> string\n"
526"\n"
527"Return a copy of the string s with only its first character\n"
528"capitalized.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000529
Barry Warsawf5256011996-12-09 18:35:56 +0000530static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000531strop_capitalize(PyObject *self, PyObject *args)
Guido van Rossum27457531996-06-12 04:24:52 +0000532{
533 char *s, *s_new;
534 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000535 PyObject *new;
Guido van Rossum27457531996-06-12 04:24:52 +0000536 int changed;
537
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000538 WARN;
Guido van Rossum7e488981998-10-08 02:25:24 +0000539 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum27457531996-06-12 04:24:52 +0000540 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000541 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum27457531996-06-12 04:24:52 +0000542 if (new == NULL)
543 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000544 s_new = PyString_AsString(new);
Guido van Rossum27457531996-06-12 04:24:52 +0000545 changed = 0;
Guido van Rossum529c9631996-06-17 16:59:33 +0000546 if (0 < n) {
Guido van Rossum27457531996-06-12 04:24:52 +0000547 int c = Py_CHARMASK(*s++);
548 if (islower(c)) {
549 changed = 1;
550 *s_new = toupper(c);
551 } else
552 *s_new = c;
553 s_new++;
554 }
555 for (i = 1; i < n; i++) {
556 int c = Py_CHARMASK(*s++);
557 if (isupper(c)) {
558 changed = 1;
559 *s_new = tolower(c);
560 } else
561 *s_new = c;
562 s_new++;
563 }
564 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000565 Py_DECREF(new);
566 Py_INCREF(args);
Guido van Rossum27457531996-06-12 04:24:52 +0000567 return args;
568 }
569 return new;
570}
571
572
Guido van Rossum54ec2881999-01-25 22:36:24 +0000573static char expandtabs__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000574"expandtabs(string, [tabsize]) -> string\n"
575"\n"
576"Expand tabs in a string, i.e. replace them by one or more spaces,\n"
577"depending on the current column and the given tab size (default 8).\n"
578"The column number is reset to zero after each newline occurring in the\n"
579"string. This doesn't understand other non-printing characters.";
Guido van Rossum54ec2881999-01-25 22:36:24 +0000580
581static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000582strop_expandtabs(PyObject *self, PyObject *args)
Guido van Rossum54ec2881999-01-25 22:36:24 +0000583{
584 /* Original by Fredrik Lundh */
585 char* e;
586 char* p;
587 char* q;
588 int i, j;
589 PyObject* out;
590 char* string;
591 int stringlen;
592 int tabsize = 8;
593
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000594 WARN;
Guido van Rossum54ec2881999-01-25 22:36:24 +0000595 /* Get arguments */
Guido van Rossum43713e52000-02-29 13:59:29 +0000596 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
Guido van Rossum54ec2881999-01-25 22:36:24 +0000597 return NULL;
598 if (tabsize < 1) {
599 PyErr_SetString(PyExc_ValueError,
600 "tabsize must be at least 1");
601 return NULL;
602 }
603
604 /* First pass: determine size of output string */
605 i = j = 0; /* j: current column; i: total of previous lines */
606 e = string + stringlen;
607 for (p = string; p < e; p++) {
608 if (*p == '\t')
609 j += tabsize - (j%tabsize);
610 else {
611 j++;
612 if (*p == '\n') {
613 i += j;
614 j = 0;
615 }
616 }
617 }
618
619 /* Second pass: create output string and fill it */
620 out = PyString_FromStringAndSize(NULL, i+j);
621 if (out == NULL)
622 return NULL;
623
624 i = 0;
625 q = PyString_AS_STRING(out);
626
627 for (p = string; p < e; p++) {
628 if (*p == '\t') {
629 j = tabsize - (i%tabsize);
630 i += j;
631 while (j-- > 0)
632 *q++ = ' ';
633 } else {
634 *q++ = *p;
635 i++;
636 if (*p == '\n')
637 i = 0;
638 }
639 }
640
641 return out;
642}
643
644
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000645static char count__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000646"count(s, sub[, start[, end]]) -> int\n"
647"\n"
648"Return the number of occurrences of substring sub in string\n"
649"s[start:end]. Optional arguments start and end are\n"
650"interpreted as in slice notation.";
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000651
652static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000653strop_count(PyObject *self, PyObject *args)
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000654{
655 char *s, *sub;
Guido van Rossumc5015831998-10-07 16:36:14 +0000656 int len, n;
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000657 int i = 0, last = INT_MAX;
658 int m, r;
659
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000660 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +0000661 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000662 return NULL;
663 if (last > len)
664 last = len;
665 if (last < 0)
666 last += len;
667 if (last < 0)
668 last = 0;
669 if (i < 0)
670 i += len;
671 if (i < 0)
672 i = 0;
673 m = last + 1 - n;
674 if (n == 0)
675 return PyInt_FromLong((long) (m-i));
676
677 r = 0;
678 while (i < m) {
679 if (!memcmp(s+i, sub, n)) {
680 r++;
681 i += n;
682 } else {
683 i++;
684 }
685 }
686 return PyInt_FromLong((long) r);
687}
688
689
Guido van Rossum983c9301997-12-29 19:52:29 +0000690static char swapcase__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000691"swapcase(s) -> string\n"
692"\n"
693"Return a copy of the string s with upper case characters\n"
694"converted to lowercase and vice versa.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000695
Barry Warsawf5256011996-12-09 18:35:56 +0000696static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000697strop_swapcase(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000698{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000699 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000700 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000701 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000702 int changed;
703
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000704 WARN;
Guido van Rossum7e488981998-10-08 02:25:24 +0000705 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000706 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000707 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000708 if (new == NULL)
709 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000710 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000711 changed = 0;
712 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000713 int c = Py_CHARMASK(*s++);
Guido van Rossum5c850621992-09-11 23:55:51 +0000714 if (islower(c)) {
715 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000716 *s_new = toupper(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000717 }
718 else if (isupper(c)) {
719 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000720 *s_new = tolower(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000721 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000722 else
723 *s_new = c;
724 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000725 }
726 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000727 Py_DECREF(new);
728 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000729 return args;
730 }
731 return new;
732}
733
734
Guido van Rossum983c9301997-12-29 19:52:29 +0000735static char atoi__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000736"atoi(s [,base]) -> int\n"
737"\n"
738"Return the integer represented by the string s in the given\n"
739"base, which defaults to 10. The string s must consist of one\n"
740"or more digits, possibly preceded by a sign. If base is 0, it\n"
741"is chosen from the leading characters of s, 0 for octal, 0x or\n"
742"0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n"
743"accepted.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000744
Barry Warsawf5256011996-12-09 18:35:56 +0000745static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000746strop_atoi(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000747{
Guido van Rossumb6775db1994-08-01 11:34:53 +0000748 char *s, *end;
749 int base = 10;
750 long x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000751 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000752
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000753 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +0000754 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000755 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000756
757 if ((base != 0 && base < 2) || base > 36) {
758 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
759 return NULL;
760 }
761
Guido van Rossumc35f9331996-09-11 23:30:42 +0000762 while (*s && isspace(Py_CHARMASK(*s)))
763 s++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000764 errno = 0;
765 if (base == 0 && s[0] == '0')
Barry Warsawf5256011996-12-09 18:35:56 +0000766 x = (long) PyOS_strtoul(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000767 else
Barry Warsawf5256011996-12-09 18:35:56 +0000768 x = PyOS_strtol(s, &end, base);
Guido van Rossum5bd69db1999-02-22 16:18:44 +0000769 if (end == s || !isalnum(end[-1]))
Guido van Rossum923fece51998-08-04 15:04:52 +0000770 goto bad;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000771 while (*end && isspace(Py_CHARMASK(*end)))
772 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000773 if (*end != '\0') {
Guido van Rossum923fece51998-08-04 15:04:52 +0000774 bad:
Guido van Rossumc35f9331996-09-11 23:30:42 +0000775 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000776 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000777 return NULL;
778 }
779 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000780 sprintf(buffer, "atoi() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000781 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000782 return NULL;
783 }
Barry Warsawf5256011996-12-09 18:35:56 +0000784 return PyInt_FromLong(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000785}
786
787
Guido van Rossum983c9301997-12-29 19:52:29 +0000788static char atol__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000789"atol(s [,base]) -> long\n"
790"\n"
791"Return the long integer represented by the string s in the\n"
792"given base, which defaults to 10. The string s must consist\n"
793"of one or more digits, possibly preceded by a sign. If base\n"
794"is 0, it is chosen from the leading characters of s, 0 for\n"
795"octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n"
796"0x or 0X is accepted. A trailing L or l is not accepted,\n"
797"unless base is 0.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000798
Barry Warsawf5256011996-12-09 18:35:56 +0000799static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000800strop_atol(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000801{
802 char *s, *end;
803 int base = 10;
Barry Warsawf5256011996-12-09 18:35:56 +0000804 PyObject *x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000805 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000806
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000807 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +0000808 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000809 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000810
811 if ((base != 0 && base < 2) || base > 36) {
812 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
813 return NULL;
814 }
815
Guido van Rossumc35f9331996-09-11 23:30:42 +0000816 while (*s && isspace(Py_CHARMASK(*s)))
817 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000818 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000819 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000820 return NULL;
821 }
Barry Warsawf5256011996-12-09 18:35:56 +0000822 x = PyLong_FromString(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000823 if (x == NULL)
824 return NULL;
825 if (base == 0 && (*end == 'l' || *end == 'L'))
826 end++;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000827 while (*end && isspace(Py_CHARMASK(*end)))
828 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000829 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000830 sprintf(buffer, "invalid literal for atol(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000831 PyErr_SetString(PyExc_ValueError, buffer);
832 Py_DECREF(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000833 return NULL;
834 }
835 return x;
836}
837
838
Guido van Rossum983c9301997-12-29 19:52:29 +0000839static char atof__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000840"atof(s) -> float\n"
841"\n"
842"Return the floating point number represented by the string s.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000843
Barry Warsawf5256011996-12-09 18:35:56 +0000844static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000845strop_atof(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000846{
Tim Petersdbd9ba62000-07-09 03:09:57 +0000847 extern double strtod(const char *, char **);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000848 char *s, *end;
849 double x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000850 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000851
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000852 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +0000853 if (!PyArg_ParseTuple(args, "s:atof", &s))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000854 return NULL;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000855 while (*s && isspace(Py_CHARMASK(*s)))
856 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000857 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000858 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000859 return NULL;
860 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000861 errno = 0;
Guido van Rossum52fa3a61997-02-14 22:59:58 +0000862 PyFPE_START_PROTECT("strop_atof", return 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000863 x = strtod(s, &end);
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000864 PyFPE_END_PROTECT(x)
Guido van Rossumc35f9331996-09-11 23:30:42 +0000865 while (*end && isspace(Py_CHARMASK(*end)))
866 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000867 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000868 sprintf(buffer, "invalid literal for atof(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000869 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000870 return NULL;
871 }
872 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000873 sprintf(buffer, "atof() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000874 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000875 return NULL;
876 }
Barry Warsawf5256011996-12-09 18:35:56 +0000877 return PyFloat_FromDouble(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000878}
879
880
Guido van Rossum983c9301997-12-29 19:52:29 +0000881static char maketrans__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000882"maketrans(frm, to) -> string\n"
883"\n"
884"Return a translation table (a string of 256 bytes long)\n"
885"suitable for use in string.translate. The strings frm and to\n"
886"must be of the same length.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000887
Guido van Rossumed7253c1996-07-23 18:12:39 +0000888static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000889strop_maketrans(PyObject *self, PyObject *args)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000890{
Guido van Rossume0548b81997-01-06 16:50:09 +0000891 unsigned char *c, *from=NULL, *to=NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000892 int i, fromlen=0, tolen=0;
Guido van Rossume0548b81997-01-06 16:50:09 +0000893 PyObject *result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000894
Guido van Rossum43713e52000-02-29 13:59:29 +0000895 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000896 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000897
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000898 if (fromlen != tolen) {
Barry Warsawf5256011996-12-09 18:35:56 +0000899 PyErr_SetString(PyExc_ValueError,
Guido van Rossumed7253c1996-07-23 18:12:39 +0000900 "maketrans arguments must have same length");
901 return NULL;
902 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000903
904 result = PyString_FromStringAndSize((char *)NULL, 256);
905 if (result == NULL)
906 return NULL;
907 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000908 for (i = 0; i < 256; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000909 c[i]=(unsigned char)i;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000910 for (i = 0; i < fromlen; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000911 c[from[i]]=to[i];
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000912
Guido van Rossume0548b81997-01-06 16:50:09 +0000913 return result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000914}
915
916
Guido van Rossum983c9301997-12-29 19:52:29 +0000917static char translate__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000918"translate(s,table [,deletechars]) -> string\n"
919"\n"
920"Return a copy of the string s, where all characters occurring\n"
921"in the optional argument deletechars are removed, and the\n"
922"remaining characters have been mapped through the given\n"
923"translation table, which must be a string of length 256.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000924
Barry Warsawf5256011996-12-09 18:35:56 +0000925static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000926strop_translate(PyObject *self, PyObject *args)
Guido van Rossuma3127e81995-09-13 17:39:06 +0000927{
Guido van Rossume0548b81997-01-06 16:50:09 +0000928 register char *input, *table, *output;
929 register int i, c, changed = 0;
930 PyObject *input_obj;
931 char *table1, *output_start, *del_table=NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000932 int inlen, tablen, dellen = 0;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000933 PyObject *result;
Guido van Rossume0548b81997-01-06 16:50:09 +0000934 int trans_table[256];
Guido van Rossuma3127e81995-09-13 17:39:06 +0000935
Guido van Rossum2e0a6542001-05-15 02:14:44 +0000936 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +0000937 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
Guido van Rossume0548b81997-01-06 16:50:09 +0000938 &table1, &tablen, &del_table, &dellen))
Guido van Rossuma3127e81995-09-13 17:39:06 +0000939 return NULL;
940 if (tablen != 256) {
Barry Warsawf5256011996-12-09 18:35:56 +0000941 PyErr_SetString(PyExc_ValueError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000942 "translation table must be 256 characters long");
Guido van Rossuma3127e81995-09-13 17:39:06 +0000943 return NULL;
944 }
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000945
Guido van Rossume0548b81997-01-06 16:50:09 +0000946 table = table1;
947 inlen = PyString_Size(input_obj);
Guido van Rossumed7253c1996-07-23 18:12:39 +0000948 result = PyString_FromStringAndSize((char *)NULL, inlen);
Guido van Rossuma3127e81995-09-13 17:39:06 +0000949 if (result == NULL)
950 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000951 output_start = output = PyString_AsString(result);
Guido van Rossume0548b81997-01-06 16:50:09 +0000952 input = PyString_AsString(input_obj);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000953
Guido van Rossume0548b81997-01-06 16:50:09 +0000954 if (dellen == 0) {
955 /* If no deletions are required, use faster code */
956 for (i = inlen; --i >= 0; ) {
957 c = Py_CHARMASK(*input++);
958 if (Py_CHARMASK((*output++ = table[c])) != c)
959 changed = 1;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000960 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000961 if (changed)
962 return result;
963 Py_DECREF(result);
964 Py_INCREF(input_obj);
965 return input_obj;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000966 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000967
968 for (i = 0; i < 256; i++)
969 trans_table[i] = Py_CHARMASK(table[i]);
970
Guido van Rossum983c9301997-12-29 19:52:29 +0000971 for (i = 0; i < dellen; i++)
Guido van Rossum1ed5e571997-04-29 21:34:16 +0000972 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Guido van Rossume0548b81997-01-06 16:50:09 +0000973
974 for (i = inlen; --i >= 0; ) {
975 c = Py_CHARMASK(*input++);
976 if (trans_table[c] != -1)
977 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
978 continue;
979 changed = 1;
980 }
981 if (!changed) {
982 Py_DECREF(result);
983 Py_INCREF(input_obj);
984 return input_obj;
985 }
986 /* Fix the size of the resulting string */
987 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
Guido van Rossum983c9301997-12-29 19:52:29 +0000988 return NULL;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000989 return result;
990}
991
992
Guido van Rossum101923b1997-04-02 06:11:18 +0000993/* What follows is used for implementing replace(). Perry Stoll. */
994
995/*
996 mymemfind
997
998 strstr replacement for arbitrary blocks of memory.
999
Barry Warsaw51ac5802000-03-20 16:36:48 +00001000 Locates the first occurrence in the memory pointed to by MEM of the
Guido van Rossum101923b1997-04-02 06:11:18 +00001001 contents of memory pointed to by PAT. Returns the index into MEM if
1002 found, or -1 if not found. If len of PAT is greater than length of
Guido van Rossum983c9301997-12-29 19:52:29 +00001003 MEM, the function returns -1.
Guido van Rossum101923b1997-04-02 06:11:18 +00001004*/
Tim Peters0f8b4942001-05-09 22:15:03 +00001005static int
Tim Peters4cd44ef2001-05-10 00:05:33 +00001006mymemfind(const char *mem, int len, const char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001007{
1008 register int ii;
1009
1010 /* pattern can not occur in the last pat_len-1 chars */
1011 len -= pat_len;
1012
1013 for (ii = 0; ii <= len; ii++) {
1014 if (mem[ii] == pat[0] &&
1015 (pat_len == 1 ||
1016 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1017 return ii;
1018 }
1019 }
1020 return -1;
1021}
1022
1023/*
1024 mymemcnt
1025
1026 Return the number of distinct times PAT is found in MEM.
1027 meaning mem=1111 and pat==11 returns 2.
1028 mem=11111 and pat==11 also return 2.
1029 */
Tim Peters0f8b4942001-05-09 22:15:03 +00001030static int
Tim Peters4cd44ef2001-05-10 00:05:33 +00001031mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001032{
1033 register int offset = 0;
1034 int nfound = 0;
1035
1036 while (len >= 0) {
1037 offset = mymemfind(mem, len, pat, pat_len);
1038 if (offset == -1)
1039 break;
1040 mem += offset + pat_len;
1041 len -= offset + pat_len;
1042 nfound++;
1043 }
1044 return nfound;
1045}
1046
Guido van Rossum983c9301997-12-29 19:52:29 +00001047/*
Guido van Rossum101923b1997-04-02 06:11:18 +00001048 mymemreplace
1049
Thomas Wouters7e474022000-07-16 12:04:32 +00001050 Return a string in which all occurrences of PAT in memory STR are
Guido van Rossum983c9301997-12-29 19:52:29 +00001051 replaced with SUB.
Guido van Rossum101923b1997-04-02 06:11:18 +00001052
Thomas Wouters7e474022000-07-16 12:04:32 +00001053 If length of PAT is less than length of STR or there are no occurrences
Guido van Rossum101923b1997-04-02 06:11:18 +00001054 of PAT in STR, then the original string is returned. Otherwise, a new
1055 string is allocated here and returned.
Guido van Rossum983c9301997-12-29 19:52:29 +00001056
Guido van Rossum101923b1997-04-02 06:11:18 +00001057 on return, out_len is:
1058 the length of output string, or
1059 -1 if the input string is returned, or
1060 unchanged if an error occurs (no memory).
1061
1062 return value is:
1063 the new string allocated locally, or
1064 NULL if an error occurred.
1065*/
Tim Peters0f8b4942001-05-09 22:15:03 +00001066static char *
Tim Peters4cd44ef2001-05-10 00:05:33 +00001067mymemreplace(const char *str, int len, /* input string */
1068 const char *pat, int pat_len, /* pattern string to find */
1069 const char *sub, int sub_len, /* substitution string */
1070 int count, /* number of replacements */
1071 int *out_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001072{
1073 char *out_s;
1074 char *new_s;
1075 int nfound, offset, new_len;
1076
1077 if (len == 0 || pat_len > len)
1078 goto return_same;
1079
1080 /* find length of output string */
1081 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001082 if (count < 0)
1083 count = INT_MAX;
1084 else if (nfound > count)
1085 nfound = count;
Guido van Rossum101923b1997-04-02 06:11:18 +00001086 if (nfound == 0)
1087 goto return_same;
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001088
Guido van Rossum101923b1997-04-02 06:11:18 +00001089 new_len = len + nfound*(sub_len - pat_len);
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001090 if (new_len == 0) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001091 /* Have to allocate something for the caller to free(). */
1092 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001093 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001094 return NULL;
1095 out_s[0] = '\0';
Guido van Rossum101923b1997-04-02 06:11:18 +00001096 }
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001097 else {
1098 assert(new_len > 0);
1099 new_s = (char *)PyMem_MALLOC(new_len);
1100 if (new_s == NULL)
1101 return NULL;
1102 out_s = new_s;
1103
Tim Peters9c012af2001-05-10 00:32:57 +00001104 for (; count > 0 && len > 0; --count) {
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001105 /* find index of next instance of pattern */
1106 offset = mymemfind(str, len, pat, pat_len);
1107 if (offset == -1)
1108 break;
1109
1110 /* copy non matching part of input string */
1111 memcpy(new_s, str, offset);
1112 str += offset + pat_len;
1113 len -= offset + pat_len;
1114
1115 /* copy substitute into the output string */
1116 new_s += offset;
1117 memcpy(new_s, sub, sub_len);
1118 new_s += sub_len;
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001119 }
1120 /* copy any remaining values into output string */
1121 if (len > 0)
1122 memcpy(new_s, str, len);
1123 }
1124 *out_len = new_len;
Guido van Rossum101923b1997-04-02 06:11:18 +00001125 return out_s;
1126
1127 return_same:
1128 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001129 return (char *)str; /* cast away const */
Guido van Rossum101923b1997-04-02 06:11:18 +00001130}
1131
1132
Guido van Rossum983c9301997-12-29 19:52:29 +00001133static char replace__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +00001134"replace (str, old, new[, maxsplit]) -> string\n"
1135"\n"
1136"Return a copy of string str with all occurrences of substring\n"
1137"old replaced by new. If the optional argument maxsplit is\n"
1138"given, only the first maxsplit occurrences are replaced.";
Guido van Rossum983c9301997-12-29 19:52:29 +00001139
1140static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001141strop_replace(PyObject *self, PyObject *args)
Guido van Rossum101923b1997-04-02 06:11:18 +00001142{
1143 char *str, *pat,*sub,*new_s;
1144 int len,pat_len,sub_len,out_len;
Tim Petersda45d552001-05-10 00:59:45 +00001145 int count = -1;
Guido van Rossum101923b1997-04-02 06:11:18 +00001146 PyObject *new;
1147
Guido van Rossum2e0a6542001-05-15 02:14:44 +00001148 WARN;
Guido van Rossum43713e52000-02-29 13:59:29 +00001149 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
Barry Warsawf577c081997-11-29 00:10:07 +00001150 &str, &len, &pat, &pat_len, &sub, &sub_len,
1151 &count))
Guido van Rossum101923b1997-04-02 06:11:18 +00001152 return NULL;
Guido van Rossum4ccda151998-05-14 02:36:29 +00001153 if (pat_len <= 0) {
1154 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1155 return NULL;
1156 }
Tim Peters1ee77d92001-05-10 01:23:39 +00001157 /* CAUTION: strop treats a replace count of 0 as infinity, unlke
1158 * current (2.1) string.py and string methods. Preserve this for
1159 * ... well, hard to say for what <wink>.
1160 */
1161 if (count == 0)
1162 count = -1;
Barry Warsawf577c081997-11-29 00:10:07 +00001163 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001164 if (new_s == NULL) {
1165 PyErr_NoMemory();
1166 return NULL;
1167 }
1168 if (out_len == -1) {
1169 /* we're returning another reference to the input string */
1170 new = PyTuple_GetItem(args, 0);
1171 Py_XINCREF(new);
1172 }
1173 else {
1174 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001175 PyMem_FREE(new_s);
Guido van Rossum101923b1997-04-02 06:11:18 +00001176 }
1177 return new;
1178}
1179
1180
Guido van Rossume270b431992-09-03 20:21:07 +00001181/* List of functions defined in the module */
1182
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001183static PyMethodDef
1184strop_methods[] = {
Tim Peters0f8b4942001-05-09 22:15:03 +00001185 {"atof", strop_atof, METH_VARARGS, atof__doc__},
1186 {"atoi", strop_atoi, METH_VARARGS, atoi__doc__},
1187 {"atol", strop_atol, METH_VARARGS, atol__doc__},
1188 {"capitalize", strop_capitalize, METH_OLDARGS, capitalize__doc__},
1189 {"count", strop_count, METH_VARARGS, count__doc__},
1190 {"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__},
1191 {"find", strop_find, METH_VARARGS, find__doc__},
1192 {"join", strop_joinfields, METH_VARARGS, joinfields__doc__},
1193 {"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__},
1194 {"lstrip", strop_lstrip, METH_OLDARGS, lstrip__doc__},
1195 {"lower", strop_lower, METH_OLDARGS, lower__doc__},
1196 {"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__},
1197 {"replace", strop_replace, METH_VARARGS, replace__doc__},
1198 {"rfind", strop_rfind, METH_VARARGS, rfind__doc__},
1199 {"rstrip", strop_rstrip, METH_OLDARGS, rstrip__doc__},
1200 {"split", strop_splitfields, METH_VARARGS, splitfields__doc__},
1201 {"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
1202 {"strip", strop_strip, METH_OLDARGS, strip__doc__},
1203 {"swapcase", strop_swapcase, METH_OLDARGS, swapcase__doc__},
1204 {"translate", strop_translate, METH_VARARGS, translate__doc__},
1205 {"upper", strop_upper, METH_OLDARGS, upper__doc__},
Guido van Rossume270b431992-09-03 20:21:07 +00001206 {NULL, NULL} /* sentinel */
1207};
1208
1209
Guido van Rossum3886bb61998-12-04 18:50:17 +00001210DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001211initstrop(void)
Guido van Rossume270b431992-09-03 20:21:07 +00001212{
Barry Warsawf5256011996-12-09 18:35:56 +00001213 PyObject *m, *d, *s;
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001214 char buf[256];
1215 int c, n;
Guido van Rossum983c9301997-12-29 19:52:29 +00001216 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1217 (PyObject*)NULL, PYTHON_API_VERSION);
Barry Warsawf5256011996-12-09 18:35:56 +00001218 d = PyModule_GetDict(m);
Guido van Rossume22e6441993-07-09 10:51:31 +00001219
1220 /* Create 'whitespace' object */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001221 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001222 for (c = 0; c < 256; c++) {
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001223 if (isspace(c))
1224 buf[n++] = c;
1225 }
Barry Warsawf5256011996-12-09 18:35:56 +00001226 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001227 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001228 PyDict_SetItemString(d, "whitespace", s);
1229 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001230 }
1231 /* Create 'lowercase' object */
1232 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001233 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001234 if (islower(c))
1235 buf[n++] = c;
1236 }
Barry Warsawf5256011996-12-09 18:35:56 +00001237 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001238 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001239 PyDict_SetItemString(d, "lowercase", s);
1240 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001241 }
1242
1243 /* Create 'uppercase' object */
1244 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001245 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001246 if (isupper(c))
1247 buf[n++] = c;
1248 }
Barry Warsawf5256011996-12-09 18:35:56 +00001249 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001250 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001251 PyDict_SetItemString(d, "uppercase", s);
1252 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001253 }
Guido van Rossume270b431992-09-03 20:21:07 +00001254}