blob: b8f7519f2dfee07933a60e3ce3aa79af9a7e2314 [file] [log] [blame]
Guido van Rossume270b431992-09-03 20:21:07 +00001
2/* strop module */
3
Guido van Rossum983c9301997-12-29 19:52:29 +00004static char strop_module__doc__[] =
Guido van Rossum23e21e71997-12-29 19:57:36 +00005"Common string manipulations, optimized for speed.\n\
Guido van Rossum786205e1997-12-30 05:10:14 +00006\n\
Guido van Rossum983c9301997-12-29 19:52:29 +00007Always use \"import string\" rather than referencing\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +00008this module directly.";
Guido van Rossum983c9301997-12-29 19:52:29 +00009
Barry Warsawf5256011996-12-09 18:35:56 +000010#include "Python.h"
Guido van Rossume270b431992-09-03 20:21:07 +000011
Guido van Rossum7b7c5781997-03-14 04:13:56 +000012#ifdef HAVE_LIMITS_H
13#include <limits.h>
14#else
15#define INT_MAX 2147483647
16#endif
17
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000018#include <ctype.h>
Guido van Rossume22e6441993-07-09 10:51:31 +000019/* XXX This file assumes that the <ctype.h> is*() functions
20 XXX are defined for all 8-bit characters! */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000021
Guido van Rossum7999a5c1996-08-08 19:16:15 +000022/* The lstrip(), rstrip() and strip() functions are implemented
23 in do_strip(), which uses an additional parameter to indicate what
24 type of strip should occur. */
25
26#define LEFTSTRIP 0
27#define RIGHTSTRIP 1
28#define BOTHSTRIP 2
29
Guido van Rossume270b431992-09-03 20:21:07 +000030
Barry Warsawf5256011996-12-09 18:35:56 +000031static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000032split_whitespace(char *s, int len, int maxsplit)
Guido van Rossum009e79b1995-05-03 17:40:23 +000033{
Barry Warsawe8fc29c1997-01-03 22:45:34 +000034 int i = 0, j, err;
35 int countsplit = 0;
36 PyObject* item;
37 PyObject *list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +000038
Guido van Rossume270b431992-09-03 20:21:07 +000039 if (list == NULL)
40 return NULL;
41
Guido van Rossume270b431992-09-03 20:21:07 +000042 while (i < len) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +000043 while (i < len && isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000044 i = i+1;
45 }
46 j = i;
Guido van Rossumee1813d1995-02-14 00:58:59 +000047 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000048 i = i+1;
49 }
50 if (j < i) {
Barry Warsawf5256011996-12-09 18:35:56 +000051 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000052 if (item == NULL)
53 goto finally;
54
Barry Warsawf5256011996-12-09 18:35:56 +000055 err = PyList_Append(list, item);
56 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000057 if (err < 0)
58 goto finally;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000059
60 countsplit++;
Barry Warsaw93be92d1997-12-02 00:29:30 +000061 while (i < len && isspace(Py_CHARMASK(s[i]))) {
62 i = i+1;
63 }
64 if (maxsplit && (countsplit >= maxsplit) && i < len) {
Barry Warsawf5256011996-12-09 18:35:56 +000065 item = PyString_FromStringAndSize(
66 s+i, (int)(len - i));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000067 if (item == NULL)
68 goto finally;
69
Barry Warsawf5256011996-12-09 18:35:56 +000070 err = PyList_Append(list, item);
71 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000072 if (err < 0)
73 goto finally;
74
Guido van Rossum7999a5c1996-08-08 19:16:15 +000075 i = len;
76 }
Guido van Rossume270b431992-09-03 20:21:07 +000077 }
78 }
Guido van Rossume270b431992-09-03 20:21:07 +000079 return list;
Barry Warsawe8fc29c1997-01-03 22:45:34 +000080 finally:
81 Py_DECREF(list);
82 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +000083}
84
85
Guido van Rossum983c9301997-12-29 19:52:29 +000086static char splitfields__doc__[] =
Fred Drakee4f13661999-11-04 19:19:48 +000087"split(s [,sep [,maxsplit]]) -> list of strings\n\
88splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
Guido van Rossum983c9301997-12-29 19:52:29 +000089\n\
90Return a list of the words in the string s, using sep as the\n\
91delimiter string. If maxsplit is nonzero, splits into at most\n\
Fred Drakee4f13661999-11-04 19:19:48 +000092maxsplit words. If sep is not specified, any whitespace string\n\
Guido van Rossum983c9301997-12-29 19:52:29 +000093is a separator. Maxsplit defaults to 0.\n\
94\n\
95(split and splitfields are synonymous)";
96
Barry Warsawf5256011996-12-09 18:35:56 +000097static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000098strop_splitfields(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +000099{
Guido van Rossum572d2d91993-11-05 10:14:49 +0000100 int len, n, i, j, err;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000101 int splitcount, maxsplit;
Guido van Rossume270b431992-09-03 20:21:07 +0000102 char *s, *sub;
Barry Warsawf5256011996-12-09 18:35:56 +0000103 PyObject *list, *item;
Guido van Rossume270b431992-09-03 20:21:07 +0000104
Guido van Rossum009e79b1995-05-03 17:40:23 +0000105 sub = NULL;
106 n = 0;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000107 splitcount = 0;
108 maxsplit = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000109 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
Guido van Rossume270b431992-09-03 20:21:07 +0000110 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000111 if (sub == NULL)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000112 return split_whitespace(s, len, maxsplit);
Guido van Rossume270b431992-09-03 20:21:07 +0000113 if (n == 0) {
Barry Warsawf5256011996-12-09 18:35:56 +0000114 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossume270b431992-09-03 20:21:07 +0000115 return NULL;
116 }
117
Barry Warsawf5256011996-12-09 18:35:56 +0000118 list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +0000119 if (list == NULL)
120 return NULL;
121
122 i = j = 0;
123 while (i+n <= len) {
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000124 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000125 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000126 if (item == NULL)
127 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000128 err = PyList_Append(list, item);
129 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000130 if (err < 0)
131 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000132 i = j = i + n;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000133 splitcount++;
134 if (maxsplit && (splitcount >= maxsplit))
135 break;
Guido van Rossume270b431992-09-03 20:21:07 +0000136 }
137 else
138 i++;
139 }
Barry Warsawf5256011996-12-09 18:35:56 +0000140 item = PyString_FromStringAndSize(s+j, (int)(len-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000141 if (item == NULL)
142 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000143 err = PyList_Append(list, item);
144 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000145 if (err < 0)
146 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000147
148 return list;
Guido van Rossum572d2d91993-11-05 10:14:49 +0000149
150 fail:
Barry Warsawf5256011996-12-09 18:35:56 +0000151 Py_DECREF(list);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000152 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +0000153}
154
155
Guido van Rossum983c9301997-12-29 19:52:29 +0000156static char joinfields__doc__[] =
157"join(list [,sep]) -> string\n\
158joinfields(list [,sep]) -> string\n\
159\n\
160Return a string composed of the words in list, with\n\
Thomas Wouters7e474022000-07-16 12:04:32 +0000161intervening occurrences of sep. Sep defaults to a single\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000162space.\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000163\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000164(join and joinfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +0000165
Barry Warsawf5256011996-12-09 18:35:56 +0000166static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000167strop_joinfields(PyObject *self, PyObject *args)
Guido van Rossumc89705d1992-11-26 08:54:07 +0000168{
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000169 PyObject *seq;
170 char *sep = NULL;
171 int seqlen, seplen = 0;
172 int i, reslen = 0, slen = 0, sz = 100;
173 PyObject *res = NULL;
174 char* p = NULL;
175 intargfunc getitemfunc;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000176
Guido van Rossum43713e52000-02-29 13:59:29 +0000177 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
Guido van Rossumc89705d1992-11-26 08:54:07 +0000178 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000179 if (sep == NULL) {
180 sep = " ";
181 seplen = 1;
182 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000183
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000184 seqlen = PySequence_Size(seq);
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000185 if (seqlen < 0 && PyErr_Occurred())
186 return NULL;
187
188 if (seqlen == 1) {
189 /* Optimization if there's only one item */
190 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000191 if (item && !PyString_Check(item)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000192 PyErr_SetString(PyExc_TypeError,
193 "first argument must be sequence of strings");
Guido van Rossumbf338301998-10-19 13:38:36 +0000194 Py_DECREF(item);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000195 return NULL;
196 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000197 return item;
198 }
199
200 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
201 return NULL;
202 p = PyString_AsString(res);
203
204 /* optimize for lists, since it's the most common case. all others
205 * (tuples and arbitrary sequences) just use the sequence abstract
206 * interface.
Barry Warsaw04d2d151997-01-03 23:46:51 +0000207 */
208 if (PyList_Check(seq)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000209 for (i = 0; i < seqlen; i++) {
210 PyObject *item = PyList_GET_ITEM(seq, i);
211 if (!PyString_Check(item)) {
212 PyErr_SetString(PyExc_TypeError,
213 "first argument must be sequence of strings");
214 Py_DECREF(res);
215 return NULL;
216 }
217 slen = PyString_GET_SIZE(item);
218 while (reslen + slen + seplen >= sz) {
219 if (_PyString_Resize(&res, sz * 2)) {
220 Py_DECREF(res);
221 return NULL;
222 }
223 sz *= 2;
224 p = PyString_AsString(res) + reslen;
225 }
226 if (i > 0) {
227 memcpy(p, sep, seplen);
228 p += seplen;
229 reslen += seplen;
230 }
231 memcpy(p, PyString_AS_STRING(item), slen);
232 p += slen;
233 reslen += slen;
234 }
235 if (_PyString_Resize(&res, reslen)) {
236 Py_DECREF(res);
237 res = NULL;
238 }
239 return res;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000240 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000241
242 if (seq->ob_type->tp_as_sequence == NULL ||
243 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
244 {
Barry Warsawf5256011996-12-09 18:35:56 +0000245 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000246 "first argument must be a sequence");
Guido van Rossumc89705d1992-11-26 08:54:07 +0000247 return NULL;
248 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000249 /* This is now type safe */
Guido van Rossumc89705d1992-11-26 08:54:07 +0000250 for (i = 0; i < seqlen; i++) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000251 PyObject *item = getitemfunc(seq, i);
252 if (!item || !PyString_Check(item)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000253 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000254 "first argument must be sequence of strings");
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000255 Py_DECREF(res);
256 Py_XDECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000257 return NULL;
258 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000259 slen = PyString_GET_SIZE(item);
260 while (reslen + slen + seplen >= sz) {
261 if (_PyString_Resize(&res, sz * 2)) {
262 Py_DECREF(res);
263 Py_DECREF(item);
264 return NULL;
265 }
266 sz *= 2;
267 p = PyString_AsString(res) + reslen;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000268 }
Guido van Rossumc89705d1992-11-26 08:54:07 +0000269 if (i > 0) {
270 memcpy(p, sep, seplen);
271 p += seplen;
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000272 reslen += seplen;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000273 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000274 memcpy(p, PyString_AS_STRING(item), slen);
275 p += slen;
276 reslen += slen;
277 Py_DECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000278 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000279 if (_PyString_Resize(&res, reslen)) {
280 Py_DECREF(res);
281 res = NULL;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000282 }
283 return res;
284}
285
Guido van Rossum983c9301997-12-29 19:52:29 +0000286
287static char find__doc__[] =
288"find(s, sub [,start [,end]]) -> in\n\
289\n\
290Return the lowest index in s where substring sub is found,\n\
291such that sub is contained within s[start,end]. Optional\n\
292arguments start and end are interpreted as in slice notation.\n\
293\n\
294Return -1 on failure.";
295
Barry Warsawf5256011996-12-09 18:35:56 +0000296static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000297strop_find(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000298{
299 char *s, *sub;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000300 int len, n, i = 0, last = INT_MAX;
Guido van Rossume270b431992-09-03 20:21:07 +0000301
Guido van Rossum43713e52000-02-29 13:59:29 +0000302 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000303 return NULL;
304
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000305 if (last > len)
306 last = len;
307 if (last < 0)
308 last += len;
309 if (last < 0)
310 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000311 if (i < 0)
312 i += len;
313 if (i < 0)
Guido van Rossume270b431992-09-03 20:21:07 +0000314 i = 0;
Guido van Rossume270b431992-09-03 20:21:07 +0000315
Guido van Rossum031c6311998-03-24 04:19:22 +0000316 if (n == 0 && i <= last)
Barry Warsawf5256011996-12-09 18:35:56 +0000317 return PyInt_FromLong((long)i);
Guido van Rossume270b431992-09-03 20:21:07 +0000318
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000319 last -= n;
320 for (; i <= last; ++i)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000321 if (s[i] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000322 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000323 return PyInt_FromLong((long)i);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000324
Barry Warsawf5256011996-12-09 18:35:56 +0000325 return PyInt_FromLong(-1L);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000326}
327
328
Guido van Rossum983c9301997-12-29 19:52:29 +0000329static char rfind__doc__[] =
330"rfind(s, sub [,start [,end]]) -> int\n\
331\n\
332Return the highest index in s where substring sub is found,\n\
333such that sub is contained within s[start,end]. Optional\n\
334arguments start and end are interpreted as in slice notation.\n\
335\n\
336Return -1 on failure.";
337
Barry Warsawf5256011996-12-09 18:35:56 +0000338static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000339strop_rfind(PyObject *self, PyObject *args)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000340{
341 char *s, *sub;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000342 int len, n, j;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000343 int i = 0, last = INT_MAX;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000344
Guido van Rossum43713e52000-02-29 13:59:29 +0000345 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000346 return NULL;
347
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000348 if (last > len)
349 last = len;
350 if (last < 0)
351 last += len;
352 if (last < 0)
353 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000354 if (i < 0)
355 i += len;
356 if (i < 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000357 i = 0;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000358
Guido van Rossum031c6311998-03-24 04:19:22 +0000359 if (n == 0 && i <= last)
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000360 return PyInt_FromLong((long)last);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000361
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000362 for (j = last-n; j >= i; --j)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000363 if (s[j] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000364 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000365 return PyInt_FromLong((long)j);
Guido van Rossume270b431992-09-03 20:21:07 +0000366
Barry Warsawf5256011996-12-09 18:35:56 +0000367 return PyInt_FromLong(-1L);
Guido van Rossume270b431992-09-03 20:21:07 +0000368}
369
Guido van Rossum983c9301997-12-29 19:52:29 +0000370
Barry Warsawf5256011996-12-09 18:35:56 +0000371static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000372do_strip(PyObject *args, int striptype)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000373{
374 char *s;
375 int len, i, j;
376
377
Guido van Rossum7e488981998-10-08 02:25:24 +0000378 if (!PyArg_Parse(args, "t#", &s, &len))
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000379 return NULL;
380
381 i = 0;
382 if (striptype != RIGHTSTRIP) {
383 while (i < len && isspace(Py_CHARMASK(s[i]))) {
384 i++;
385 }
386 }
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000387
388 j = len;
389 if (striptype != LEFTSTRIP) {
390 do {
391 j--;
392 } while (j >= i && isspace(Py_CHARMASK(s[j])));
393 j++;
394 }
395
396 if (i == 0 && j == len) {
Barry Warsawf5256011996-12-09 18:35:56 +0000397 Py_INCREF(args);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000398 return args;
399 }
400 else
Barry Warsawf5256011996-12-09 18:35:56 +0000401 return PyString_FromStringAndSize(s+i, j-i);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000402}
403
Guido van Rossume270b431992-09-03 20:21:07 +0000404
Guido van Rossum983c9301997-12-29 19:52:29 +0000405static char strip__doc__[] =
406"strip(s) -> string\n\
407\n\
408Return a copy of the string s with leading and trailing\n\
409whitespace removed.";
410
Barry Warsawf5256011996-12-09 18:35:56 +0000411static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000412strop_strip(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000413{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000414 return do_strip(args, BOTHSTRIP);
415}
Guido van Rossume270b431992-09-03 20:21:07 +0000416
Guido van Rossum983c9301997-12-29 19:52:29 +0000417
418static char lstrip__doc__[] =
419"lstrip(s) -> string\n\
420\n\
421Return a copy of the string s with leading whitespace removed.";
422
Barry Warsawf5256011996-12-09 18:35:56 +0000423static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000424strop_lstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000425{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000426 return do_strip(args, LEFTSTRIP);
427}
Guido van Rossume270b431992-09-03 20:21:07 +0000428
Guido van Rossum983c9301997-12-29 19:52:29 +0000429
430static char rstrip__doc__[] =
431"rstrip(s) -> string\n\
432\n\
433Return a copy of the string s with trailing whitespace removed.";
434
Barry Warsawf5256011996-12-09 18:35:56 +0000435static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000436strop_rstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000437{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000438 return do_strip(args, RIGHTSTRIP);
Guido van Rossume270b431992-09-03 20:21:07 +0000439}
440
441
Guido van Rossum983c9301997-12-29 19:52:29 +0000442static char lower__doc__[] =
443"lower(s) -> string\n\
444\n\
445Return a copy of the string s converted to lowercase.";
446
Barry Warsawf5256011996-12-09 18:35:56 +0000447static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000448strop_lower(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000449{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000450 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000451 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000452 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000453 int changed;
454
Guido van Rossum7e488981998-10-08 02:25:24 +0000455 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000456 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000457 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000458 if (new == NULL)
459 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000460 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000461 changed = 0;
462 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000463 int c = Py_CHARMASK(*s++);
Barry Warsaw04d2d151997-01-03 23:46:51 +0000464 if (isupper(c)) {
Guido van Rossum5c850621992-09-11 23:55:51 +0000465 changed = 1;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000466 *s_new = tolower(c);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000467 } else
468 *s_new = c;
469 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000470 }
471 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000472 Py_DECREF(new);
473 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000474 return args;
475 }
476 return new;
477}
478
479
Guido van Rossum983c9301997-12-29 19:52:29 +0000480static char upper__doc__[] =
481"upper(s) -> string\n\
482\n\
483Return a copy of the string s converted to uppercase.";
484
Barry Warsawf5256011996-12-09 18:35:56 +0000485static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000486strop_upper(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000487{
Barry Warsaw04d2d151997-01-03 23:46:51 +0000488 char *s, *s_new;
489 int i, n;
490 PyObject *new;
491 int changed;
492
Guido van Rossum7e488981998-10-08 02:25:24 +0000493 if (!PyArg_Parse(args, "t#", &s, &n))
Barry Warsaw04d2d151997-01-03 23:46:51 +0000494 return NULL;
495 new = PyString_FromStringAndSize(NULL, n);
496 if (new == NULL)
497 return NULL;
498 s_new = PyString_AsString(new);
499 changed = 0;
500 for (i = 0; i < n; i++) {
501 int c = Py_CHARMASK(*s++);
502 if (islower(c)) {
503 changed = 1;
504 *s_new = toupper(c);
505 } else
506 *s_new = c;
507 s_new++;
508 }
509 if (!changed) {
510 Py_DECREF(new);
511 Py_INCREF(args);
512 return args;
513 }
514 return new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000515}
516
517
Guido van Rossum983c9301997-12-29 19:52:29 +0000518static char capitalize__doc__[] =
519"capitalize(s) -> string\n\
520\n\
521Return a copy of the string s with only its first character\n\
522capitalized.";
523
Barry Warsawf5256011996-12-09 18:35:56 +0000524static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000525strop_capitalize(PyObject *self, PyObject *args)
Guido van Rossum27457531996-06-12 04:24:52 +0000526{
527 char *s, *s_new;
528 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000529 PyObject *new;
Guido van Rossum27457531996-06-12 04:24:52 +0000530 int changed;
531
Guido van Rossum7e488981998-10-08 02:25:24 +0000532 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum27457531996-06-12 04:24:52 +0000533 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000534 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum27457531996-06-12 04:24:52 +0000535 if (new == NULL)
536 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000537 s_new = PyString_AsString(new);
Guido van Rossum27457531996-06-12 04:24:52 +0000538 changed = 0;
Guido van Rossum529c9631996-06-17 16:59:33 +0000539 if (0 < n) {
Guido van Rossum27457531996-06-12 04:24:52 +0000540 int c = Py_CHARMASK(*s++);
541 if (islower(c)) {
542 changed = 1;
543 *s_new = toupper(c);
544 } else
545 *s_new = c;
546 s_new++;
547 }
548 for (i = 1; i < n; i++) {
549 int c = Py_CHARMASK(*s++);
550 if (isupper(c)) {
551 changed = 1;
552 *s_new = tolower(c);
553 } else
554 *s_new = c;
555 s_new++;
556 }
557 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000558 Py_DECREF(new);
559 Py_INCREF(args);
Guido van Rossum27457531996-06-12 04:24:52 +0000560 return args;
561 }
562 return new;
563}
564
565
Guido van Rossum54ec2881999-01-25 22:36:24 +0000566static char expandtabs__doc__[] =
567"expandtabs(string, [tabsize]) -> string\n\
568\n\
Guido van Rossum54ec2881999-01-25 22:36:24 +0000569Expand tabs in a string, i.e. replace them by one or more spaces,\n\
570depending on the current column and the given tab size (default 8).\n\
571The column number is reset to zero after each newline occurring in the\n\
572string. This doesn't understand other non-printing characters.";
573
574static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000575strop_expandtabs(PyObject *self, PyObject *args)
Guido van Rossum54ec2881999-01-25 22:36:24 +0000576{
577 /* Original by Fredrik Lundh */
578 char* e;
579 char* p;
580 char* q;
581 int i, j;
582 PyObject* out;
583 char* string;
584 int stringlen;
585 int tabsize = 8;
586
587 /* Get arguments */
Guido van Rossum43713e52000-02-29 13:59:29 +0000588 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
Guido van Rossum54ec2881999-01-25 22:36:24 +0000589 return NULL;
590 if (tabsize < 1) {
591 PyErr_SetString(PyExc_ValueError,
592 "tabsize must be at least 1");
593 return NULL;
594 }
595
596 /* First pass: determine size of output string */
597 i = j = 0; /* j: current column; i: total of previous lines */
598 e = string + stringlen;
599 for (p = string; p < e; p++) {
600 if (*p == '\t')
601 j += tabsize - (j%tabsize);
602 else {
603 j++;
604 if (*p == '\n') {
605 i += j;
606 j = 0;
607 }
608 }
609 }
610
611 /* Second pass: create output string and fill it */
612 out = PyString_FromStringAndSize(NULL, i+j);
613 if (out == NULL)
614 return NULL;
615
616 i = 0;
617 q = PyString_AS_STRING(out);
618
619 for (p = string; p < e; p++) {
620 if (*p == '\t') {
621 j = tabsize - (i%tabsize);
622 i += j;
623 while (j-- > 0)
624 *q++ = ' ';
625 } else {
626 *q++ = *p;
627 i++;
628 if (*p == '\n')
629 i = 0;
630 }
631 }
632
633 return out;
634}
635
636
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000637static char count__doc__[] =
638"count(s, sub[, start[, end]]) -> int\n\
639\n\
640Return the number of occurrences of substring sub in string\n\
641s[start:end]. Optional arguments start and end are\n\
642interpreted as in slice notation.";
643
644static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000645strop_count(PyObject *self, PyObject *args)
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000646{
647 char *s, *sub;
Guido van Rossumc5015831998-10-07 16:36:14 +0000648 int len, n;
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000649 int i = 0, last = INT_MAX;
650 int m, r;
651
Guido van Rossum43713e52000-02-29 13:59:29 +0000652 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000653 return NULL;
654 if (last > len)
655 last = len;
656 if (last < 0)
657 last += len;
658 if (last < 0)
659 last = 0;
660 if (i < 0)
661 i += len;
662 if (i < 0)
663 i = 0;
664 m = last + 1 - n;
665 if (n == 0)
666 return PyInt_FromLong((long) (m-i));
667
668 r = 0;
669 while (i < m) {
670 if (!memcmp(s+i, sub, n)) {
671 r++;
672 i += n;
673 } else {
674 i++;
675 }
676 }
677 return PyInt_FromLong((long) r);
678}
679
680
Guido van Rossum983c9301997-12-29 19:52:29 +0000681static char swapcase__doc__[] =
682"swapcase(s) -> string\n\
683\n\
684Return a copy of the string s with upper case characters\n\
685converted to lowercase and vice versa.";
686
Barry Warsawf5256011996-12-09 18:35:56 +0000687static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000688strop_swapcase(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000689{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000690 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000691 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000692 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000693 int changed;
694
Guido van Rossum7e488981998-10-08 02:25:24 +0000695 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000696 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000697 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000698 if (new == NULL)
699 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000700 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000701 changed = 0;
702 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000703 int c = Py_CHARMASK(*s++);
Guido van Rossum5c850621992-09-11 23:55:51 +0000704 if (islower(c)) {
705 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000706 *s_new = toupper(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000707 }
708 else if (isupper(c)) {
709 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000710 *s_new = tolower(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000711 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000712 else
713 *s_new = c;
714 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000715 }
716 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000717 Py_DECREF(new);
718 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000719 return args;
720 }
721 return new;
722}
723
724
Guido van Rossum983c9301997-12-29 19:52:29 +0000725static char atoi__doc__[] =
726"atoi(s [,base]) -> int\n\
727\n\
728Return the integer represented by the string s in the given\n\
729base, which defaults to 10. The string s must consist of one\n\
730or more digits, possibly preceded by a sign. If base is 0, it\n\
731is chosen from the leading characters of s, 0 for octal, 0x or\n\
7320X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
733accepted.";
734
Barry Warsawf5256011996-12-09 18:35:56 +0000735static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000736strop_atoi(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000737{
Guido van Rossumb6775db1994-08-01 11:34:53 +0000738 char *s, *end;
739 int base = 10;
740 long x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000741 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000742
Guido van Rossum43713e52000-02-29 13:59:29 +0000743 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000744 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000745
746 if ((base != 0 && base < 2) || base > 36) {
747 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
748 return NULL;
749 }
750
Guido van Rossumc35f9331996-09-11 23:30:42 +0000751 while (*s && isspace(Py_CHARMASK(*s)))
752 s++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000753 errno = 0;
754 if (base == 0 && s[0] == '0')
Barry Warsawf5256011996-12-09 18:35:56 +0000755 x = (long) PyOS_strtoul(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000756 else
Barry Warsawf5256011996-12-09 18:35:56 +0000757 x = PyOS_strtol(s, &end, base);
Guido van Rossum5bd69db1999-02-22 16:18:44 +0000758 if (end == s || !isalnum(end[-1]))
Guido van Rossum923fece51998-08-04 15:04:52 +0000759 goto bad;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000760 while (*end && isspace(Py_CHARMASK(*end)))
761 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000762 if (*end != '\0') {
Guido van Rossum923fece51998-08-04 15:04:52 +0000763 bad:
Guido van Rossumc35f9331996-09-11 23:30:42 +0000764 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000765 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000766 return NULL;
767 }
768 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000769 sprintf(buffer, "atoi() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000770 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000771 return NULL;
772 }
Barry Warsawf5256011996-12-09 18:35:56 +0000773 return PyInt_FromLong(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000774}
775
776
Guido van Rossum983c9301997-12-29 19:52:29 +0000777static char atol__doc__[] =
778"atol(s [,base]) -> long\n\
779\n\
780Return the long integer represented by the string s in the\n\
781given base, which defaults to 10. The string s must consist\n\
782of one or more digits, possibly preceded by a sign. If base\n\
783is 0, it is chosen from the leading characters of s, 0 for\n\
784octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
7850x or 0X is accepted. A trailing L or l is not accepted,\n\
786unless base is 0.";
787
Barry Warsawf5256011996-12-09 18:35:56 +0000788static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000789strop_atol(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000790{
791 char *s, *end;
792 int base = 10;
Barry Warsawf5256011996-12-09 18:35:56 +0000793 PyObject *x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000794 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000795
Guido van Rossum43713e52000-02-29 13:59:29 +0000796 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000797 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000798
799 if ((base != 0 && base < 2) || base > 36) {
800 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
801 return NULL;
802 }
803
Guido van Rossumc35f9331996-09-11 23:30:42 +0000804 while (*s && isspace(Py_CHARMASK(*s)))
805 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000806 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000807 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000808 return NULL;
809 }
Barry Warsawf5256011996-12-09 18:35:56 +0000810 x = PyLong_FromString(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000811 if (x == NULL)
812 return NULL;
813 if (base == 0 && (*end == 'l' || *end == 'L'))
814 end++;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000815 while (*end && isspace(Py_CHARMASK(*end)))
816 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000817 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000818 sprintf(buffer, "invalid literal for atol(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000819 PyErr_SetString(PyExc_ValueError, buffer);
820 Py_DECREF(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000821 return NULL;
822 }
823 return x;
824}
825
826
Guido van Rossum983c9301997-12-29 19:52:29 +0000827static char atof__doc__[] =
828"atof(s) -> float\n\
829\n\
830Return the floating point number represented by the string s.";
831
Barry Warsawf5256011996-12-09 18:35:56 +0000832static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000833strop_atof(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000834{
Tim Petersdbd9ba62000-07-09 03:09:57 +0000835 extern double strtod(const char *, char **);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000836 char *s, *end;
837 double x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000838 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000839
Guido van Rossum43713e52000-02-29 13:59:29 +0000840 if (!PyArg_ParseTuple(args, "s:atof", &s))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000841 return NULL;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000842 while (*s && isspace(Py_CHARMASK(*s)))
843 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000844 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000845 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000846 return NULL;
847 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000848 errno = 0;
Guido van Rossum52fa3a61997-02-14 22:59:58 +0000849 PyFPE_START_PROTECT("strop_atof", return 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000850 x = strtod(s, &end);
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000851 PyFPE_END_PROTECT(x)
Guido van Rossumc35f9331996-09-11 23:30:42 +0000852 while (*end && isspace(Py_CHARMASK(*end)))
853 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000854 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000855 sprintf(buffer, "invalid literal for atof(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000856 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000857 return NULL;
858 }
859 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000860 sprintf(buffer, "atof() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000861 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000862 return NULL;
863 }
Barry Warsawf5256011996-12-09 18:35:56 +0000864 return PyFloat_FromDouble(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000865}
866
867
Guido van Rossum983c9301997-12-29 19:52:29 +0000868static char maketrans__doc__[] =
869"maketrans(frm, to) -> string\n\
870\n\
871Return a translation table (a string of 256 bytes long)\n\
872suitable for use in string.translate. The strings frm and to\n\
873must be of the same length.";
874
Guido van Rossumed7253c1996-07-23 18:12:39 +0000875static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000876strop_maketrans(PyObject *self, PyObject *args)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000877{
Guido van Rossume0548b81997-01-06 16:50:09 +0000878 unsigned char *c, *from=NULL, *to=NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000879 int i, fromlen=0, tolen=0;
Guido van Rossume0548b81997-01-06 16:50:09 +0000880 PyObject *result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000881
Guido van Rossum43713e52000-02-29 13:59:29 +0000882 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000883 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000884
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000885 if (fromlen != tolen) {
Barry Warsawf5256011996-12-09 18:35:56 +0000886 PyErr_SetString(PyExc_ValueError,
Guido van Rossumed7253c1996-07-23 18:12:39 +0000887 "maketrans arguments must have same length");
888 return NULL;
889 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000890
891 result = PyString_FromStringAndSize((char *)NULL, 256);
892 if (result == NULL)
893 return NULL;
894 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000895 for (i = 0; i < 256; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000896 c[i]=(unsigned char)i;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000897 for (i = 0; i < fromlen; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000898 c[from[i]]=to[i];
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000899
Guido van Rossume0548b81997-01-06 16:50:09 +0000900 return result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000901}
902
903
Guido van Rossum983c9301997-12-29 19:52:29 +0000904static char translate__doc__[] =
905"translate(s,table [,deletechars]) -> string\n\
906\n\
907Return a copy of the string s, where all characters occurring\n\
908in the optional argument deletechars are removed, and the\n\
909remaining characters have been mapped through the given\n\
910translation table, which must be a string of length 256.";
911
Barry Warsawf5256011996-12-09 18:35:56 +0000912static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000913strop_translate(PyObject *self, PyObject *args)
Guido van Rossuma3127e81995-09-13 17:39:06 +0000914{
Guido van Rossume0548b81997-01-06 16:50:09 +0000915 register char *input, *table, *output;
916 register int i, c, changed = 0;
917 PyObject *input_obj;
918 char *table1, *output_start, *del_table=NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000919 int inlen, tablen, dellen = 0;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000920 PyObject *result;
Guido van Rossume0548b81997-01-06 16:50:09 +0000921 int trans_table[256];
Guido van Rossuma3127e81995-09-13 17:39:06 +0000922
Guido van Rossum43713e52000-02-29 13:59:29 +0000923 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
Guido van Rossume0548b81997-01-06 16:50:09 +0000924 &table1, &tablen, &del_table, &dellen))
Guido van Rossuma3127e81995-09-13 17:39:06 +0000925 return NULL;
926 if (tablen != 256) {
Barry Warsawf5256011996-12-09 18:35:56 +0000927 PyErr_SetString(PyExc_ValueError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000928 "translation table must be 256 characters long");
Guido van Rossuma3127e81995-09-13 17:39:06 +0000929 return NULL;
930 }
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000931
Guido van Rossume0548b81997-01-06 16:50:09 +0000932 table = table1;
933 inlen = PyString_Size(input_obj);
Guido van Rossumed7253c1996-07-23 18:12:39 +0000934 result = PyString_FromStringAndSize((char *)NULL, inlen);
Guido van Rossuma3127e81995-09-13 17:39:06 +0000935 if (result == NULL)
936 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000937 output_start = output = PyString_AsString(result);
Guido van Rossume0548b81997-01-06 16:50:09 +0000938 input = PyString_AsString(input_obj);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000939
Guido van Rossume0548b81997-01-06 16:50:09 +0000940 if (dellen == 0) {
941 /* If no deletions are required, use faster code */
942 for (i = inlen; --i >= 0; ) {
943 c = Py_CHARMASK(*input++);
944 if (Py_CHARMASK((*output++ = table[c])) != c)
945 changed = 1;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000946 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000947 if (changed)
948 return result;
949 Py_DECREF(result);
950 Py_INCREF(input_obj);
951 return input_obj;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000952 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000953
954 for (i = 0; i < 256; i++)
955 trans_table[i] = Py_CHARMASK(table[i]);
956
Guido van Rossum983c9301997-12-29 19:52:29 +0000957 for (i = 0; i < dellen; i++)
Guido van Rossum1ed5e571997-04-29 21:34:16 +0000958 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Guido van Rossume0548b81997-01-06 16:50:09 +0000959
960 for (i = inlen; --i >= 0; ) {
961 c = Py_CHARMASK(*input++);
962 if (trans_table[c] != -1)
963 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
964 continue;
965 changed = 1;
966 }
967 if (!changed) {
968 Py_DECREF(result);
969 Py_INCREF(input_obj);
970 return input_obj;
971 }
972 /* Fix the size of the resulting string */
973 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
Guido van Rossum983c9301997-12-29 19:52:29 +0000974 return NULL;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000975 return result;
976}
977
978
Guido van Rossum101923b1997-04-02 06:11:18 +0000979/* What follows is used for implementing replace(). Perry Stoll. */
980
981/*
982 mymemfind
983
984 strstr replacement for arbitrary blocks of memory.
985
Barry Warsaw51ac5802000-03-20 16:36:48 +0000986 Locates the first occurrence in the memory pointed to by MEM of the
Guido van Rossum101923b1997-04-02 06:11:18 +0000987 contents of memory pointed to by PAT. Returns the index into MEM if
988 found, or -1 if not found. If len of PAT is greater than length of
Guido van Rossum983c9301997-12-29 19:52:29 +0000989 MEM, the function returns -1.
Guido van Rossum101923b1997-04-02 06:11:18 +0000990*/
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000991static int mymemfind(char *mem, int len, char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +0000992{
993 register int ii;
994
995 /* pattern can not occur in the last pat_len-1 chars */
996 len -= pat_len;
997
998 for (ii = 0; ii <= len; ii++) {
999 if (mem[ii] == pat[0] &&
1000 (pat_len == 1 ||
1001 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1002 return ii;
1003 }
1004 }
1005 return -1;
1006}
1007
1008/*
1009 mymemcnt
1010
1011 Return the number of distinct times PAT is found in MEM.
1012 meaning mem=1111 and pat==11 returns 2.
1013 mem=11111 and pat==11 also return 2.
1014 */
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001015static int mymemcnt(char *mem, int len, char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001016{
1017 register int offset = 0;
1018 int nfound = 0;
1019
1020 while (len >= 0) {
1021 offset = mymemfind(mem, len, pat, pat_len);
1022 if (offset == -1)
1023 break;
1024 mem += offset + pat_len;
1025 len -= offset + pat_len;
1026 nfound++;
1027 }
1028 return nfound;
1029}
1030
Guido van Rossum983c9301997-12-29 19:52:29 +00001031/*
Guido van Rossum101923b1997-04-02 06:11:18 +00001032 mymemreplace
1033
Thomas Wouters7e474022000-07-16 12:04:32 +00001034 Return a string in which all occurrences of PAT in memory STR are
Guido van Rossum983c9301997-12-29 19:52:29 +00001035 replaced with SUB.
Guido van Rossum101923b1997-04-02 06:11:18 +00001036
Thomas Wouters7e474022000-07-16 12:04:32 +00001037 If length of PAT is less than length of STR or there are no occurrences
Guido van Rossum101923b1997-04-02 06:11:18 +00001038 of PAT in STR, then the original string is returned. Otherwise, a new
1039 string is allocated here and returned.
Guido van Rossum983c9301997-12-29 19:52:29 +00001040
Guido van Rossum101923b1997-04-02 06:11:18 +00001041 on return, out_len is:
1042 the length of output string, or
1043 -1 if the input string is returned, or
1044 unchanged if an error occurs (no memory).
1045
1046 return value is:
1047 the new string allocated locally, or
1048 NULL if an error occurred.
1049*/
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001050static char *mymemreplace(char *str, int len, char *pat, int pat_len, char *sub, int sub_len, int count, int *out_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001051{
1052 char *out_s;
1053 char *new_s;
1054 int nfound, offset, new_len;
1055
1056 if (len == 0 || pat_len > len)
1057 goto return_same;
1058
1059 /* find length of output string */
1060 nfound = mymemcnt(str, len, pat, pat_len);
Barry Warsawf577c081997-11-29 00:10:07 +00001061 if (count > 0)
1062 nfound = nfound > count ? count : nfound;
Guido van Rossum101923b1997-04-02 06:11:18 +00001063 if (nfound == 0)
1064 goto return_same;
1065 new_len = len + nfound*(sub_len - pat_len);
1066
Guido van Rossumb18618d2000-05-03 23:44:39 +00001067 new_s = (char *)PyMem_MALLOC(new_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001068 if (new_s == NULL) return NULL;
1069
1070 *out_len = new_len;
1071 out_s = new_s;
1072
1073 while (len > 0) {
1074 /* find index of next instance of pattern */
1075 offset = mymemfind(str, len, pat, pat_len);
1076 /* if not found, break out of loop */
1077 if (offset == -1) break;
1078
1079 /* copy non matching part of input string */
1080 memcpy(new_s, str, offset); /* copy part of str before pat */
1081 str += offset + pat_len; /* move str past pattern */
1082 len -= offset + pat_len; /* reduce length of str remaining */
1083
1084 /* copy substitute into the output string */
1085 new_s += offset; /* move new_s to dest for sub string */
1086 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1087 new_s += sub_len; /* offset new_s past sub string */
Barry Warsawf577c081997-11-29 00:10:07 +00001088
1089 /* break when we've done count replacements */
1090 if (--count == 0) break;
Guido van Rossum101923b1997-04-02 06:11:18 +00001091 }
1092 /* copy any remaining values into output string */
1093 if (len > 0)
1094 memcpy(new_s, str, len);
1095 return out_s;
1096
1097 return_same:
1098 *out_len = -1;
1099 return str;
1100}
1101
1102
Guido van Rossum983c9301997-12-29 19:52:29 +00001103static char replace__doc__[] =
1104"replace (str, old, new[, maxsplit]) -> string\n\
1105\n\
1106Return a copy of string str with all occurrences of substring\n\
1107old replaced by new. If the optional argument maxsplit is\n\
1108given, only the first maxsplit occurrences are replaced.";
1109
1110static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001111strop_replace(PyObject *self, PyObject *args)
Guido van Rossum101923b1997-04-02 06:11:18 +00001112{
1113 char *str, *pat,*sub,*new_s;
1114 int len,pat_len,sub_len,out_len;
Barry Warsawf577c081997-11-29 00:10:07 +00001115 int count = 0;
Guido van Rossum101923b1997-04-02 06:11:18 +00001116 PyObject *new;
1117
Guido van Rossum43713e52000-02-29 13:59:29 +00001118 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
Barry Warsawf577c081997-11-29 00:10:07 +00001119 &str, &len, &pat, &pat_len, &sub, &sub_len,
1120 &count))
Guido van Rossum101923b1997-04-02 06:11:18 +00001121 return NULL;
Guido van Rossum4ccda151998-05-14 02:36:29 +00001122 if (pat_len <= 0) {
1123 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1124 return NULL;
1125 }
Barry Warsawf577c081997-11-29 00:10:07 +00001126 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001127 if (new_s == NULL) {
1128 PyErr_NoMemory();
1129 return NULL;
1130 }
1131 if (out_len == -1) {
1132 /* we're returning another reference to the input string */
1133 new = PyTuple_GetItem(args, 0);
1134 Py_XINCREF(new);
1135 }
1136 else {
1137 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001138 PyMem_FREE(new_s);
Guido van Rossum101923b1997-04-02 06:11:18 +00001139 }
1140 return new;
1141}
1142
1143
Guido van Rossume270b431992-09-03 20:21:07 +00001144/* List of functions defined in the module */
1145
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001146static PyMethodDef
1147strop_methods[] = {
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001148 {"atof", strop_atof,
1149 METH_VARARGS, atof__doc__},
1150 {"atoi", strop_atoi,
1151 METH_VARARGS, atoi__doc__},
1152 {"atol", strop_atol,
1153 METH_VARARGS, atol__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001154 {"capitalize", strop_capitalize,
1155 METH_OLDARGS, capitalize__doc__},
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001156 {"count", strop_count,
1157 METH_VARARGS, count__doc__},
1158 {"expandtabs", strop_expandtabs,
1159 METH_VARARGS, expandtabs__doc__},
1160 {"find", strop_find,
1161 METH_VARARGS, find__doc__},
1162 {"join", strop_joinfields,
1163 METH_VARARGS, joinfields__doc__},
1164 {"joinfields", strop_joinfields,
1165 METH_VARARGS, joinfields__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001166 {"lstrip", strop_lstrip,
1167 METH_OLDARGS, lstrip__doc__},
1168 {"lower", strop_lower,
1169 METH_OLDARGS, lower__doc__},
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001170 {"maketrans", strop_maketrans,
1171 METH_VARARGS, maketrans__doc__},
1172 {"replace", strop_replace,
1173 METH_VARARGS, replace__doc__},
1174 {"rfind", strop_rfind,
1175 METH_VARARGS, rfind__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001176 {"rstrip", strop_rstrip,
1177 METH_OLDARGS, rstrip__doc__},
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001178 {"split", strop_splitfields,
1179 METH_VARARGS, splitfields__doc__},
1180 {"splitfields", strop_splitfields,
1181 METH_VARARGS, splitfields__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001182 {"strip", strop_strip,
1183 METH_OLDARGS, strip__doc__},
1184 {"swapcase", strop_swapcase,
1185 METH_OLDARGS, swapcase__doc__},
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001186 {"translate", strop_translate,
1187 METH_VARARGS, translate__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001188 {"upper", strop_upper,
1189 METH_OLDARGS, upper__doc__},
Guido van Rossume270b431992-09-03 20:21:07 +00001190 {NULL, NULL} /* sentinel */
1191};
1192
1193
Guido van Rossum3886bb61998-12-04 18:50:17 +00001194DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001195initstrop(void)
Guido van Rossume270b431992-09-03 20:21:07 +00001196{
Barry Warsawf5256011996-12-09 18:35:56 +00001197 PyObject *m, *d, *s;
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001198 char buf[256];
1199 int c, n;
Guido van Rossum983c9301997-12-29 19:52:29 +00001200 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1201 (PyObject*)NULL, PYTHON_API_VERSION);
Barry Warsawf5256011996-12-09 18:35:56 +00001202 d = PyModule_GetDict(m);
Guido van Rossume22e6441993-07-09 10:51:31 +00001203
1204 /* Create 'whitespace' object */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001205 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001206 for (c = 0; c < 256; c++) {
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001207 if (isspace(c))
1208 buf[n++] = c;
1209 }
Barry Warsawf5256011996-12-09 18:35:56 +00001210 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001211 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001212 PyDict_SetItemString(d, "whitespace", s);
1213 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001214 }
1215 /* Create 'lowercase' object */
1216 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001217 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001218 if (islower(c))
1219 buf[n++] = c;
1220 }
Barry Warsawf5256011996-12-09 18:35:56 +00001221 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001222 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001223 PyDict_SetItemString(d, "lowercase", s);
1224 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001225 }
1226
1227 /* Create 'uppercase' object */
1228 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001229 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001230 if (isupper(c))
1231 buf[n++] = c;
1232 }
Barry Warsawf5256011996-12-09 18:35:56 +00001233 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001234 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001235 PyDict_SetItemString(d, "uppercase", s);
1236 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001237 }
Guido van Rossume270b431992-09-03 20:21:07 +00001238}