blob: 203feb942d556f79f8d593582fc1fd7f66f86042 [file] [log] [blame]
Guido van Rossume270b431992-09-03 20:21:07 +00001
2/* strop module */
3
Guido van Rossum983c9301997-12-29 19:52:29 +00004static char strop_module__doc__[] =
Guido van Rossum23e21e71997-12-29 19:57:36 +00005"Common string manipulations, optimized for speed.\n\
Guido van Rossum786205e1997-12-30 05:10:14 +00006\n\
Guido van Rossum983c9301997-12-29 19:52:29 +00007Always use \"import string\" rather than referencing\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +00008this module directly.";
Guido van Rossum983c9301997-12-29 19:52:29 +00009
Barry Warsawf5256011996-12-09 18:35:56 +000010#include "Python.h"
Guido van Rossume270b431992-09-03 20:21:07 +000011
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000012#include <ctype.h>
Guido van Rossume22e6441993-07-09 10:51:31 +000013/* XXX This file assumes that the <ctype.h> is*() functions
14 XXX are defined for all 8-bit characters! */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000015
Guido van Rossum7999a5c1996-08-08 19:16:15 +000016/* The lstrip(), rstrip() and strip() functions are implemented
17 in do_strip(), which uses an additional parameter to indicate what
18 type of strip should occur. */
19
20#define LEFTSTRIP 0
21#define RIGHTSTRIP 1
22#define BOTHSTRIP 2
23
Guido van Rossume270b431992-09-03 20:21:07 +000024
Barry Warsawf5256011996-12-09 18:35:56 +000025static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000026split_whitespace(char *s, int len, int maxsplit)
Guido van Rossum009e79b1995-05-03 17:40:23 +000027{
Barry Warsawe8fc29c1997-01-03 22:45:34 +000028 int i = 0, j, err;
29 int countsplit = 0;
30 PyObject* item;
31 PyObject *list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +000032
Guido van Rossume270b431992-09-03 20:21:07 +000033 if (list == NULL)
34 return NULL;
35
Guido van Rossume270b431992-09-03 20:21:07 +000036 while (i < len) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +000037 while (i < len && isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000038 i = i+1;
39 }
40 j = i;
Guido van Rossumee1813d1995-02-14 00:58:59 +000041 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000042 i = i+1;
43 }
44 if (j < i) {
Barry Warsawf5256011996-12-09 18:35:56 +000045 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000046 if (item == NULL)
47 goto finally;
48
Barry Warsawf5256011996-12-09 18:35:56 +000049 err = PyList_Append(list, item);
50 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000051 if (err < 0)
52 goto finally;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000053
54 countsplit++;
Barry Warsaw93be92d1997-12-02 00:29:30 +000055 while (i < len && isspace(Py_CHARMASK(s[i]))) {
56 i = i+1;
57 }
58 if (maxsplit && (countsplit >= maxsplit) && i < len) {
Barry Warsawf5256011996-12-09 18:35:56 +000059 item = PyString_FromStringAndSize(
60 s+i, (int)(len - i));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000061 if (item == NULL)
62 goto finally;
63
Barry Warsawf5256011996-12-09 18:35:56 +000064 err = PyList_Append(list, item);
65 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000066 if (err < 0)
67 goto finally;
68
Guido van Rossum7999a5c1996-08-08 19:16:15 +000069 i = len;
70 }
Guido van Rossume270b431992-09-03 20:21:07 +000071 }
72 }
Guido van Rossume270b431992-09-03 20:21:07 +000073 return list;
Barry Warsawe8fc29c1997-01-03 22:45:34 +000074 finally:
75 Py_DECREF(list);
76 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +000077}
78
79
Guido van Rossum983c9301997-12-29 19:52:29 +000080static char splitfields__doc__[] =
Fred Drakee4f13661999-11-04 19:19:48 +000081"split(s [,sep [,maxsplit]]) -> list of strings\n\
82splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
Guido van Rossum983c9301997-12-29 19:52:29 +000083\n\
84Return a list of the words in the string s, using sep as the\n\
85delimiter string. If maxsplit is nonzero, splits into at most\n\
Fred Drakee4f13661999-11-04 19:19:48 +000086maxsplit words. If sep is not specified, any whitespace string\n\
Guido van Rossum983c9301997-12-29 19:52:29 +000087is a separator. Maxsplit defaults to 0.\n\
88\n\
89(split and splitfields are synonymous)";
90
Barry Warsawf5256011996-12-09 18:35:56 +000091static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000092strop_splitfields(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +000093{
Guido van Rossum572d2d91993-11-05 10:14:49 +000094 int len, n, i, j, err;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000095 int splitcount, maxsplit;
Guido van Rossume270b431992-09-03 20:21:07 +000096 char *s, *sub;
Barry Warsawf5256011996-12-09 18:35:56 +000097 PyObject *list, *item;
Guido van Rossume270b431992-09-03 20:21:07 +000098
Guido van Rossum009e79b1995-05-03 17:40:23 +000099 sub = NULL;
100 n = 0;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000101 splitcount = 0;
102 maxsplit = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000103 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
Guido van Rossume270b431992-09-03 20:21:07 +0000104 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000105 if (sub == NULL)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000106 return split_whitespace(s, len, maxsplit);
Guido van Rossume270b431992-09-03 20:21:07 +0000107 if (n == 0) {
Barry Warsawf5256011996-12-09 18:35:56 +0000108 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossume270b431992-09-03 20:21:07 +0000109 return NULL;
110 }
111
Barry Warsawf5256011996-12-09 18:35:56 +0000112 list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +0000113 if (list == NULL)
114 return NULL;
115
116 i = j = 0;
117 while (i+n <= len) {
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000118 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000119 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000120 if (item == NULL)
121 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000122 err = PyList_Append(list, item);
123 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000124 if (err < 0)
125 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000126 i = j = i + n;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000127 splitcount++;
128 if (maxsplit && (splitcount >= maxsplit))
129 break;
Guido van Rossume270b431992-09-03 20:21:07 +0000130 }
131 else
132 i++;
133 }
Barry Warsawf5256011996-12-09 18:35:56 +0000134 item = PyString_FromStringAndSize(s+j, (int)(len-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000135 if (item == NULL)
136 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000137 err = PyList_Append(list, item);
138 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000139 if (err < 0)
140 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000141
142 return list;
Guido van Rossum572d2d91993-11-05 10:14:49 +0000143
144 fail:
Barry Warsawf5256011996-12-09 18:35:56 +0000145 Py_DECREF(list);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000146 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +0000147}
148
149
Guido van Rossum983c9301997-12-29 19:52:29 +0000150static char joinfields__doc__[] =
151"join(list [,sep]) -> string\n\
152joinfields(list [,sep]) -> string\n\
153\n\
154Return a string composed of the words in list, with\n\
Thomas Wouters7e474022000-07-16 12:04:32 +0000155intervening occurrences of sep. Sep defaults to a single\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000156space.\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000157\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000158(join and joinfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +0000159
Barry Warsawf5256011996-12-09 18:35:56 +0000160static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000161strop_joinfields(PyObject *self, PyObject *args)
Guido van Rossumc89705d1992-11-26 08:54:07 +0000162{
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000163 PyObject *seq;
164 char *sep = NULL;
165 int seqlen, seplen = 0;
166 int i, reslen = 0, slen = 0, sz = 100;
167 PyObject *res = NULL;
168 char* p = NULL;
169 intargfunc getitemfunc;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000170
Guido van Rossum43713e52000-02-29 13:59:29 +0000171 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
Guido van Rossumc89705d1992-11-26 08:54:07 +0000172 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000173 if (sep == NULL) {
174 sep = " ";
175 seplen = 1;
176 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000177
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000178 seqlen = PySequence_Size(seq);
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000179 if (seqlen < 0 && PyErr_Occurred())
180 return NULL;
181
182 if (seqlen == 1) {
183 /* Optimization if there's only one item */
184 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000185 if (item && !PyString_Check(item)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000186 PyErr_SetString(PyExc_TypeError,
187 "first argument must be sequence of strings");
Guido van Rossumbf338301998-10-19 13:38:36 +0000188 Py_DECREF(item);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000189 return NULL;
190 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000191 return item;
192 }
193
194 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
195 return NULL;
196 p = PyString_AsString(res);
197
198 /* optimize for lists, since it's the most common case. all others
199 * (tuples and arbitrary sequences) just use the sequence abstract
200 * interface.
Barry Warsaw04d2d151997-01-03 23:46:51 +0000201 */
202 if (PyList_Check(seq)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000203 for (i = 0; i < seqlen; i++) {
204 PyObject *item = PyList_GET_ITEM(seq, i);
205 if (!PyString_Check(item)) {
206 PyErr_SetString(PyExc_TypeError,
207 "first argument must be sequence of strings");
208 Py_DECREF(res);
209 return NULL;
210 }
211 slen = PyString_GET_SIZE(item);
212 while (reslen + slen + seplen >= sz) {
213 if (_PyString_Resize(&res, sz * 2)) {
214 Py_DECREF(res);
215 return NULL;
216 }
217 sz *= 2;
218 p = PyString_AsString(res) + reslen;
219 }
220 if (i > 0) {
221 memcpy(p, sep, seplen);
222 p += seplen;
223 reslen += seplen;
224 }
225 memcpy(p, PyString_AS_STRING(item), slen);
226 p += slen;
227 reslen += slen;
228 }
229 if (_PyString_Resize(&res, reslen)) {
230 Py_DECREF(res);
231 res = NULL;
232 }
233 return res;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000234 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000235
236 if (seq->ob_type->tp_as_sequence == NULL ||
237 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
238 {
Barry Warsawf5256011996-12-09 18:35:56 +0000239 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000240 "first argument must be a sequence");
Guido van Rossumc89705d1992-11-26 08:54:07 +0000241 return NULL;
242 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000243 /* This is now type safe */
Guido van Rossumc89705d1992-11-26 08:54:07 +0000244 for (i = 0; i < seqlen; i++) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000245 PyObject *item = getitemfunc(seq, i);
246 if (!item || !PyString_Check(item)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000247 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000248 "first argument must be sequence of strings");
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000249 Py_DECREF(res);
250 Py_XDECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000251 return NULL;
252 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000253 slen = PyString_GET_SIZE(item);
254 while (reslen + slen + seplen >= sz) {
255 if (_PyString_Resize(&res, sz * 2)) {
256 Py_DECREF(res);
257 Py_DECREF(item);
258 return NULL;
259 }
260 sz *= 2;
261 p = PyString_AsString(res) + reslen;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000262 }
Guido van Rossumc89705d1992-11-26 08:54:07 +0000263 if (i > 0) {
264 memcpy(p, sep, seplen);
265 p += seplen;
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000266 reslen += seplen;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000267 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000268 memcpy(p, PyString_AS_STRING(item), slen);
269 p += slen;
270 reslen += slen;
271 Py_DECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000272 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000273 if (_PyString_Resize(&res, reslen)) {
274 Py_DECREF(res);
275 res = NULL;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000276 }
277 return res;
278}
279
Guido van Rossum983c9301997-12-29 19:52:29 +0000280
281static char find__doc__[] =
282"find(s, sub [,start [,end]]) -> in\n\
283\n\
284Return the lowest index in s where substring sub is found,\n\
285such that sub is contained within s[start,end]. Optional\n\
286arguments start and end are interpreted as in slice notation.\n\
287\n\
288Return -1 on failure.";
289
Barry Warsawf5256011996-12-09 18:35:56 +0000290static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000291strop_find(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000292{
293 char *s, *sub;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000294 int len, n, i = 0, last = INT_MAX;
Guido van Rossume270b431992-09-03 20:21:07 +0000295
Guido van Rossum43713e52000-02-29 13:59:29 +0000296 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000297 return NULL;
298
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000299 if (last > len)
300 last = len;
301 if (last < 0)
302 last += len;
303 if (last < 0)
304 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000305 if (i < 0)
306 i += len;
307 if (i < 0)
Guido van Rossume270b431992-09-03 20:21:07 +0000308 i = 0;
Guido van Rossume270b431992-09-03 20:21:07 +0000309
Guido van Rossum031c6311998-03-24 04:19:22 +0000310 if (n == 0 && i <= last)
Barry Warsawf5256011996-12-09 18:35:56 +0000311 return PyInt_FromLong((long)i);
Guido van Rossume270b431992-09-03 20:21:07 +0000312
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000313 last -= n;
314 for (; i <= last; ++i)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000315 if (s[i] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000316 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000317 return PyInt_FromLong((long)i);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000318
Barry Warsawf5256011996-12-09 18:35:56 +0000319 return PyInt_FromLong(-1L);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000320}
321
322
Guido van Rossum983c9301997-12-29 19:52:29 +0000323static char rfind__doc__[] =
324"rfind(s, sub [,start [,end]]) -> int\n\
325\n\
326Return the highest index in s where substring sub is found,\n\
327such that sub is contained within s[start,end]. Optional\n\
328arguments start and end are interpreted as in slice notation.\n\
329\n\
330Return -1 on failure.";
331
Barry Warsawf5256011996-12-09 18:35:56 +0000332static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000333strop_rfind(PyObject *self, PyObject *args)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000334{
335 char *s, *sub;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000336 int len, n, j;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000337 int i = 0, last = INT_MAX;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000338
Guido van Rossum43713e52000-02-29 13:59:29 +0000339 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000340 return NULL;
341
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000342 if (last > len)
343 last = len;
344 if (last < 0)
345 last += len;
346 if (last < 0)
347 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000348 if (i < 0)
349 i += len;
350 if (i < 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000351 i = 0;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000352
Guido van Rossum031c6311998-03-24 04:19:22 +0000353 if (n == 0 && i <= last)
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000354 return PyInt_FromLong((long)last);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000355
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000356 for (j = last-n; j >= i; --j)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000357 if (s[j] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000358 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000359 return PyInt_FromLong((long)j);
Guido van Rossume270b431992-09-03 20:21:07 +0000360
Barry Warsawf5256011996-12-09 18:35:56 +0000361 return PyInt_FromLong(-1L);
Guido van Rossume270b431992-09-03 20:21:07 +0000362}
363
Guido van Rossum983c9301997-12-29 19:52:29 +0000364
Barry Warsawf5256011996-12-09 18:35:56 +0000365static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000366do_strip(PyObject *args, int striptype)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000367{
368 char *s;
369 int len, i, j;
370
371
Guido van Rossum7e488981998-10-08 02:25:24 +0000372 if (!PyArg_Parse(args, "t#", &s, &len))
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000373 return NULL;
374
375 i = 0;
376 if (striptype != RIGHTSTRIP) {
377 while (i < len && isspace(Py_CHARMASK(s[i]))) {
378 i++;
379 }
380 }
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000381
382 j = len;
383 if (striptype != LEFTSTRIP) {
384 do {
385 j--;
386 } while (j >= i && isspace(Py_CHARMASK(s[j])));
387 j++;
388 }
389
390 if (i == 0 && j == len) {
Barry Warsawf5256011996-12-09 18:35:56 +0000391 Py_INCREF(args);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000392 return args;
393 }
394 else
Barry Warsawf5256011996-12-09 18:35:56 +0000395 return PyString_FromStringAndSize(s+i, j-i);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000396}
397
Guido van Rossume270b431992-09-03 20:21:07 +0000398
Guido van Rossum983c9301997-12-29 19:52:29 +0000399static char strip__doc__[] =
400"strip(s) -> string\n\
401\n\
402Return a copy of the string s with leading and trailing\n\
403whitespace removed.";
404
Barry Warsawf5256011996-12-09 18:35:56 +0000405static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000406strop_strip(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000407{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000408 return do_strip(args, BOTHSTRIP);
409}
Guido van Rossume270b431992-09-03 20:21:07 +0000410
Guido van Rossum983c9301997-12-29 19:52:29 +0000411
412static char lstrip__doc__[] =
413"lstrip(s) -> string\n\
414\n\
415Return a copy of the string s with leading whitespace removed.";
416
Barry Warsawf5256011996-12-09 18:35:56 +0000417static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000418strop_lstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000419{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000420 return do_strip(args, LEFTSTRIP);
421}
Guido van Rossume270b431992-09-03 20:21:07 +0000422
Guido van Rossum983c9301997-12-29 19:52:29 +0000423
424static char rstrip__doc__[] =
425"rstrip(s) -> string\n\
426\n\
427Return a copy of the string s with trailing whitespace removed.";
428
Barry Warsawf5256011996-12-09 18:35:56 +0000429static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000430strop_rstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000431{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000432 return do_strip(args, RIGHTSTRIP);
Guido van Rossume270b431992-09-03 20:21:07 +0000433}
434
435
Guido van Rossum983c9301997-12-29 19:52:29 +0000436static char lower__doc__[] =
437"lower(s) -> string\n\
438\n\
439Return a copy of the string s converted to lowercase.";
440
Barry Warsawf5256011996-12-09 18:35:56 +0000441static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000442strop_lower(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000443{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000444 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000445 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000446 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000447 int changed;
448
Guido van Rossum7e488981998-10-08 02:25:24 +0000449 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000450 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000451 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000452 if (new == NULL)
453 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000454 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000455 changed = 0;
456 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000457 int c = Py_CHARMASK(*s++);
Barry Warsaw04d2d151997-01-03 23:46:51 +0000458 if (isupper(c)) {
Guido van Rossum5c850621992-09-11 23:55:51 +0000459 changed = 1;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000460 *s_new = tolower(c);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000461 } else
462 *s_new = c;
463 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000464 }
465 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000466 Py_DECREF(new);
467 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000468 return args;
469 }
470 return new;
471}
472
473
Guido van Rossum983c9301997-12-29 19:52:29 +0000474static char upper__doc__[] =
475"upper(s) -> string\n\
476\n\
477Return a copy of the string s converted to uppercase.";
478
Barry Warsawf5256011996-12-09 18:35:56 +0000479static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000480strop_upper(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000481{
Barry Warsaw04d2d151997-01-03 23:46:51 +0000482 char *s, *s_new;
483 int i, n;
484 PyObject *new;
485 int changed;
486
Guido van Rossum7e488981998-10-08 02:25:24 +0000487 if (!PyArg_Parse(args, "t#", &s, &n))
Barry Warsaw04d2d151997-01-03 23:46:51 +0000488 return NULL;
489 new = PyString_FromStringAndSize(NULL, n);
490 if (new == NULL)
491 return NULL;
492 s_new = PyString_AsString(new);
493 changed = 0;
494 for (i = 0; i < n; i++) {
495 int c = Py_CHARMASK(*s++);
496 if (islower(c)) {
497 changed = 1;
498 *s_new = toupper(c);
499 } else
500 *s_new = c;
501 s_new++;
502 }
503 if (!changed) {
504 Py_DECREF(new);
505 Py_INCREF(args);
506 return args;
507 }
508 return new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000509}
510
511
Guido van Rossum983c9301997-12-29 19:52:29 +0000512static char capitalize__doc__[] =
513"capitalize(s) -> string\n\
514\n\
515Return a copy of the string s with only its first character\n\
516capitalized.";
517
Barry Warsawf5256011996-12-09 18:35:56 +0000518static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000519strop_capitalize(PyObject *self, PyObject *args)
Guido van Rossum27457531996-06-12 04:24:52 +0000520{
521 char *s, *s_new;
522 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000523 PyObject *new;
Guido van Rossum27457531996-06-12 04:24:52 +0000524 int changed;
525
Guido van Rossum7e488981998-10-08 02:25:24 +0000526 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum27457531996-06-12 04:24:52 +0000527 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000528 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum27457531996-06-12 04:24:52 +0000529 if (new == NULL)
530 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000531 s_new = PyString_AsString(new);
Guido van Rossum27457531996-06-12 04:24:52 +0000532 changed = 0;
Guido van Rossum529c9631996-06-17 16:59:33 +0000533 if (0 < n) {
Guido van Rossum27457531996-06-12 04:24:52 +0000534 int c = Py_CHARMASK(*s++);
535 if (islower(c)) {
536 changed = 1;
537 *s_new = toupper(c);
538 } else
539 *s_new = c;
540 s_new++;
541 }
542 for (i = 1; i < n; i++) {
543 int c = Py_CHARMASK(*s++);
544 if (isupper(c)) {
545 changed = 1;
546 *s_new = tolower(c);
547 } else
548 *s_new = c;
549 s_new++;
550 }
551 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000552 Py_DECREF(new);
553 Py_INCREF(args);
Guido van Rossum27457531996-06-12 04:24:52 +0000554 return args;
555 }
556 return new;
557}
558
559
Guido van Rossum54ec2881999-01-25 22:36:24 +0000560static char expandtabs__doc__[] =
561"expandtabs(string, [tabsize]) -> string\n\
562\n\
Guido van Rossum54ec2881999-01-25 22:36:24 +0000563Expand tabs in a string, i.e. replace them by one or more spaces,\n\
564depending on the current column and the given tab size (default 8).\n\
565The column number is reset to zero after each newline occurring in the\n\
566string. This doesn't understand other non-printing characters.";
567
568static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000569strop_expandtabs(PyObject *self, PyObject *args)
Guido van Rossum54ec2881999-01-25 22:36:24 +0000570{
571 /* Original by Fredrik Lundh */
572 char* e;
573 char* p;
574 char* q;
575 int i, j;
576 PyObject* out;
577 char* string;
578 int stringlen;
579 int tabsize = 8;
580
581 /* Get arguments */
Guido van Rossum43713e52000-02-29 13:59:29 +0000582 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
Guido van Rossum54ec2881999-01-25 22:36:24 +0000583 return NULL;
584 if (tabsize < 1) {
585 PyErr_SetString(PyExc_ValueError,
586 "tabsize must be at least 1");
587 return NULL;
588 }
589
590 /* First pass: determine size of output string */
591 i = j = 0; /* j: current column; i: total of previous lines */
592 e = string + stringlen;
593 for (p = string; p < e; p++) {
594 if (*p == '\t')
595 j += tabsize - (j%tabsize);
596 else {
597 j++;
598 if (*p == '\n') {
599 i += j;
600 j = 0;
601 }
602 }
603 }
604
605 /* Second pass: create output string and fill it */
606 out = PyString_FromStringAndSize(NULL, i+j);
607 if (out == NULL)
608 return NULL;
609
610 i = 0;
611 q = PyString_AS_STRING(out);
612
613 for (p = string; p < e; p++) {
614 if (*p == '\t') {
615 j = tabsize - (i%tabsize);
616 i += j;
617 while (j-- > 0)
618 *q++ = ' ';
619 } else {
620 *q++ = *p;
621 i++;
622 if (*p == '\n')
623 i = 0;
624 }
625 }
626
627 return out;
628}
629
630
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000631static char count__doc__[] =
632"count(s, sub[, start[, end]]) -> int\n\
633\n\
634Return the number of occurrences of substring sub in string\n\
635s[start:end]. Optional arguments start and end are\n\
636interpreted as in slice notation.";
637
638static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000639strop_count(PyObject *self, PyObject *args)
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000640{
641 char *s, *sub;
Guido van Rossumc5015831998-10-07 16:36:14 +0000642 int len, n;
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000643 int i = 0, last = INT_MAX;
644 int m, r;
645
Guido van Rossum43713e52000-02-29 13:59:29 +0000646 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000647 return NULL;
648 if (last > len)
649 last = len;
650 if (last < 0)
651 last += len;
652 if (last < 0)
653 last = 0;
654 if (i < 0)
655 i += len;
656 if (i < 0)
657 i = 0;
658 m = last + 1 - n;
659 if (n == 0)
660 return PyInt_FromLong((long) (m-i));
661
662 r = 0;
663 while (i < m) {
664 if (!memcmp(s+i, sub, n)) {
665 r++;
666 i += n;
667 } else {
668 i++;
669 }
670 }
671 return PyInt_FromLong((long) r);
672}
673
674
Guido van Rossum983c9301997-12-29 19:52:29 +0000675static char swapcase__doc__[] =
676"swapcase(s) -> string\n\
677\n\
678Return a copy of the string s with upper case characters\n\
679converted to lowercase and vice versa.";
680
Barry Warsawf5256011996-12-09 18:35:56 +0000681static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000682strop_swapcase(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000683{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000684 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000685 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000686 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000687 int changed;
688
Guido van Rossum7e488981998-10-08 02:25:24 +0000689 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000690 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000691 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000692 if (new == NULL)
693 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000694 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000695 changed = 0;
696 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000697 int c = Py_CHARMASK(*s++);
Guido van Rossum5c850621992-09-11 23:55:51 +0000698 if (islower(c)) {
699 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000700 *s_new = toupper(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000701 }
702 else if (isupper(c)) {
703 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000704 *s_new = tolower(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000705 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000706 else
707 *s_new = c;
708 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000709 }
710 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000711 Py_DECREF(new);
712 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000713 return args;
714 }
715 return new;
716}
717
718
Guido van Rossum983c9301997-12-29 19:52:29 +0000719static char atoi__doc__[] =
720"atoi(s [,base]) -> int\n\
721\n\
722Return the integer represented by the string s in the given\n\
723base, which defaults to 10. The string s must consist of one\n\
724or more digits, possibly preceded by a sign. If base is 0, it\n\
725is chosen from the leading characters of s, 0 for octal, 0x or\n\
7260X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
727accepted.";
728
Barry Warsawf5256011996-12-09 18:35:56 +0000729static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000730strop_atoi(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000731{
Guido van Rossumb6775db1994-08-01 11:34:53 +0000732 char *s, *end;
733 int base = 10;
734 long x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000735 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000736
Guido van Rossum43713e52000-02-29 13:59:29 +0000737 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000738 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000739
740 if ((base != 0 && base < 2) || base > 36) {
741 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
742 return NULL;
743 }
744
Guido van Rossumc35f9331996-09-11 23:30:42 +0000745 while (*s && isspace(Py_CHARMASK(*s)))
746 s++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000747 errno = 0;
748 if (base == 0 && s[0] == '0')
Barry Warsawf5256011996-12-09 18:35:56 +0000749 x = (long) PyOS_strtoul(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000750 else
Barry Warsawf5256011996-12-09 18:35:56 +0000751 x = PyOS_strtol(s, &end, base);
Guido van Rossum5bd69db1999-02-22 16:18:44 +0000752 if (end == s || !isalnum(end[-1]))
Guido van Rossum923fece51998-08-04 15:04:52 +0000753 goto bad;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000754 while (*end && isspace(Py_CHARMASK(*end)))
755 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000756 if (*end != '\0') {
Guido van Rossum923fece51998-08-04 15:04:52 +0000757 bad:
Guido van Rossumc35f9331996-09-11 23:30:42 +0000758 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000759 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000760 return NULL;
761 }
762 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000763 sprintf(buffer, "atoi() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000764 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000765 return NULL;
766 }
Barry Warsawf5256011996-12-09 18:35:56 +0000767 return PyInt_FromLong(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000768}
769
770
Guido van Rossum983c9301997-12-29 19:52:29 +0000771static char atol__doc__[] =
772"atol(s [,base]) -> long\n\
773\n\
774Return the long integer represented by the string s in the\n\
775given base, which defaults to 10. The string s must consist\n\
776of one or more digits, possibly preceded by a sign. If base\n\
777is 0, it is chosen from the leading characters of s, 0 for\n\
778octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
7790x or 0X is accepted. A trailing L or l is not accepted,\n\
780unless base is 0.";
781
Barry Warsawf5256011996-12-09 18:35:56 +0000782static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000783strop_atol(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000784{
785 char *s, *end;
786 int base = 10;
Barry Warsawf5256011996-12-09 18:35:56 +0000787 PyObject *x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000788 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000789
Guido van Rossum43713e52000-02-29 13:59:29 +0000790 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000791 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000792
793 if ((base != 0 && base < 2) || base > 36) {
794 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
795 return NULL;
796 }
797
Guido van Rossumc35f9331996-09-11 23:30:42 +0000798 while (*s && isspace(Py_CHARMASK(*s)))
799 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000800 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000801 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000802 return NULL;
803 }
Barry Warsawf5256011996-12-09 18:35:56 +0000804 x = PyLong_FromString(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000805 if (x == NULL)
806 return NULL;
807 if (base == 0 && (*end == 'l' || *end == 'L'))
808 end++;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000809 while (*end && isspace(Py_CHARMASK(*end)))
810 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000811 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000812 sprintf(buffer, "invalid literal for atol(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000813 PyErr_SetString(PyExc_ValueError, buffer);
814 Py_DECREF(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000815 return NULL;
816 }
817 return x;
818}
819
820
Guido van Rossum983c9301997-12-29 19:52:29 +0000821static char atof__doc__[] =
822"atof(s) -> float\n\
823\n\
824Return the floating point number represented by the string s.";
825
Barry Warsawf5256011996-12-09 18:35:56 +0000826static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000827strop_atof(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000828{
Tim Petersdbd9ba62000-07-09 03:09:57 +0000829 extern double strtod(const char *, char **);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000830 char *s, *end;
831 double x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000832 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000833
Guido van Rossum43713e52000-02-29 13:59:29 +0000834 if (!PyArg_ParseTuple(args, "s:atof", &s))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000835 return NULL;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000836 while (*s && isspace(Py_CHARMASK(*s)))
837 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000838 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000839 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000840 return NULL;
841 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000842 errno = 0;
Guido van Rossum52fa3a61997-02-14 22:59:58 +0000843 PyFPE_START_PROTECT("strop_atof", return 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000844 x = strtod(s, &end);
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000845 PyFPE_END_PROTECT(x)
Guido van Rossumc35f9331996-09-11 23:30:42 +0000846 while (*end && isspace(Py_CHARMASK(*end)))
847 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000848 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000849 sprintf(buffer, "invalid literal for atof(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000850 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000851 return NULL;
852 }
853 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000854 sprintf(buffer, "atof() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000855 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000856 return NULL;
857 }
Barry Warsawf5256011996-12-09 18:35:56 +0000858 return PyFloat_FromDouble(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000859}
860
861
Guido van Rossum983c9301997-12-29 19:52:29 +0000862static char maketrans__doc__[] =
863"maketrans(frm, to) -> string\n\
864\n\
865Return a translation table (a string of 256 bytes long)\n\
866suitable for use in string.translate. The strings frm and to\n\
867must be of the same length.";
868
Guido van Rossumed7253c1996-07-23 18:12:39 +0000869static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000870strop_maketrans(PyObject *self, PyObject *args)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000871{
Guido van Rossume0548b81997-01-06 16:50:09 +0000872 unsigned char *c, *from=NULL, *to=NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000873 int i, fromlen=0, tolen=0;
Guido van Rossume0548b81997-01-06 16:50:09 +0000874 PyObject *result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000875
Guido van Rossum43713e52000-02-29 13:59:29 +0000876 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000877 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000878
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000879 if (fromlen != tolen) {
Barry Warsawf5256011996-12-09 18:35:56 +0000880 PyErr_SetString(PyExc_ValueError,
Guido van Rossumed7253c1996-07-23 18:12:39 +0000881 "maketrans arguments must have same length");
882 return NULL;
883 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000884
885 result = PyString_FromStringAndSize((char *)NULL, 256);
886 if (result == NULL)
887 return NULL;
888 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000889 for (i = 0; i < 256; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000890 c[i]=(unsigned char)i;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000891 for (i = 0; i < fromlen; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000892 c[from[i]]=to[i];
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000893
Guido van Rossume0548b81997-01-06 16:50:09 +0000894 return result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000895}
896
897
Guido van Rossum983c9301997-12-29 19:52:29 +0000898static char translate__doc__[] =
899"translate(s,table [,deletechars]) -> string\n\
900\n\
901Return a copy of the string s, where all characters occurring\n\
902in the optional argument deletechars are removed, and the\n\
903remaining characters have been mapped through the given\n\
904translation table, which must be a string of length 256.";
905
Barry Warsawf5256011996-12-09 18:35:56 +0000906static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000907strop_translate(PyObject *self, PyObject *args)
Guido van Rossuma3127e81995-09-13 17:39:06 +0000908{
Guido van Rossume0548b81997-01-06 16:50:09 +0000909 register char *input, *table, *output;
910 register int i, c, changed = 0;
911 PyObject *input_obj;
912 char *table1, *output_start, *del_table=NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000913 int inlen, tablen, dellen = 0;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000914 PyObject *result;
Guido van Rossume0548b81997-01-06 16:50:09 +0000915 int trans_table[256];
Guido van Rossuma3127e81995-09-13 17:39:06 +0000916
Guido van Rossum43713e52000-02-29 13:59:29 +0000917 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
Guido van Rossume0548b81997-01-06 16:50:09 +0000918 &table1, &tablen, &del_table, &dellen))
Guido van Rossuma3127e81995-09-13 17:39:06 +0000919 return NULL;
920 if (tablen != 256) {
Barry Warsawf5256011996-12-09 18:35:56 +0000921 PyErr_SetString(PyExc_ValueError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000922 "translation table must be 256 characters long");
Guido van Rossuma3127e81995-09-13 17:39:06 +0000923 return NULL;
924 }
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000925
Guido van Rossume0548b81997-01-06 16:50:09 +0000926 table = table1;
927 inlen = PyString_Size(input_obj);
Guido van Rossumed7253c1996-07-23 18:12:39 +0000928 result = PyString_FromStringAndSize((char *)NULL, inlen);
Guido van Rossuma3127e81995-09-13 17:39:06 +0000929 if (result == NULL)
930 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000931 output_start = output = PyString_AsString(result);
Guido van Rossume0548b81997-01-06 16:50:09 +0000932 input = PyString_AsString(input_obj);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000933
Guido van Rossume0548b81997-01-06 16:50:09 +0000934 if (dellen == 0) {
935 /* If no deletions are required, use faster code */
936 for (i = inlen; --i >= 0; ) {
937 c = Py_CHARMASK(*input++);
938 if (Py_CHARMASK((*output++ = table[c])) != c)
939 changed = 1;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000940 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000941 if (changed)
942 return result;
943 Py_DECREF(result);
944 Py_INCREF(input_obj);
945 return input_obj;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000946 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000947
948 for (i = 0; i < 256; i++)
949 trans_table[i] = Py_CHARMASK(table[i]);
950
Guido van Rossum983c9301997-12-29 19:52:29 +0000951 for (i = 0; i < dellen; i++)
Guido van Rossum1ed5e571997-04-29 21:34:16 +0000952 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Guido van Rossume0548b81997-01-06 16:50:09 +0000953
954 for (i = inlen; --i >= 0; ) {
955 c = Py_CHARMASK(*input++);
956 if (trans_table[c] != -1)
957 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
958 continue;
959 changed = 1;
960 }
961 if (!changed) {
962 Py_DECREF(result);
963 Py_INCREF(input_obj);
964 return input_obj;
965 }
966 /* Fix the size of the resulting string */
967 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
Guido van Rossum983c9301997-12-29 19:52:29 +0000968 return NULL;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000969 return result;
970}
971
972
Guido van Rossum101923b1997-04-02 06:11:18 +0000973/* What follows is used for implementing replace(). Perry Stoll. */
974
975/*
976 mymemfind
977
978 strstr replacement for arbitrary blocks of memory.
979
Barry Warsaw51ac5802000-03-20 16:36:48 +0000980 Locates the first occurrence in the memory pointed to by MEM of the
Guido van Rossum101923b1997-04-02 06:11:18 +0000981 contents of memory pointed to by PAT. Returns the index into MEM if
982 found, or -1 if not found. If len of PAT is greater than length of
Guido van Rossum983c9301997-12-29 19:52:29 +0000983 MEM, the function returns -1.
Guido van Rossum101923b1997-04-02 06:11:18 +0000984*/
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000985static int mymemfind(char *mem, int len, char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +0000986{
987 register int ii;
988
989 /* pattern can not occur in the last pat_len-1 chars */
990 len -= pat_len;
991
992 for (ii = 0; ii <= len; ii++) {
993 if (mem[ii] == pat[0] &&
994 (pat_len == 1 ||
995 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
996 return ii;
997 }
998 }
999 return -1;
1000}
1001
1002/*
1003 mymemcnt
1004
1005 Return the number of distinct times PAT is found in MEM.
1006 meaning mem=1111 and pat==11 returns 2.
1007 mem=11111 and pat==11 also return 2.
1008 */
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001009static int mymemcnt(char *mem, int len, char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001010{
1011 register int offset = 0;
1012 int nfound = 0;
1013
1014 while (len >= 0) {
1015 offset = mymemfind(mem, len, pat, pat_len);
1016 if (offset == -1)
1017 break;
1018 mem += offset + pat_len;
1019 len -= offset + pat_len;
1020 nfound++;
1021 }
1022 return nfound;
1023}
1024
Guido van Rossum983c9301997-12-29 19:52:29 +00001025/*
Guido van Rossum101923b1997-04-02 06:11:18 +00001026 mymemreplace
1027
Thomas Wouters7e474022000-07-16 12:04:32 +00001028 Return a string in which all occurrences of PAT in memory STR are
Guido van Rossum983c9301997-12-29 19:52:29 +00001029 replaced with SUB.
Guido van Rossum101923b1997-04-02 06:11:18 +00001030
Thomas Wouters7e474022000-07-16 12:04:32 +00001031 If length of PAT is less than length of STR or there are no occurrences
Guido van Rossum101923b1997-04-02 06:11:18 +00001032 of PAT in STR, then the original string is returned. Otherwise, a new
1033 string is allocated here and returned.
Guido van Rossum983c9301997-12-29 19:52:29 +00001034
Guido van Rossum101923b1997-04-02 06:11:18 +00001035 on return, out_len is:
1036 the length of output string, or
1037 -1 if the input string is returned, or
1038 unchanged if an error occurs (no memory).
1039
1040 return value is:
1041 the new string allocated locally, or
1042 NULL if an error occurred.
1043*/
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001044static char *mymemreplace(char *str, int len, char *pat, int pat_len, char *sub, int sub_len, int count, int *out_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001045{
1046 char *out_s;
1047 char *new_s;
1048 int nfound, offset, new_len;
1049
1050 if (len == 0 || pat_len > len)
1051 goto return_same;
1052
1053 /* find length of output string */
1054 nfound = mymemcnt(str, len, pat, pat_len);
Barry Warsawf577c081997-11-29 00:10:07 +00001055 if (count > 0)
1056 nfound = nfound > count ? count : nfound;
Guido van Rossum101923b1997-04-02 06:11:18 +00001057 if (nfound == 0)
1058 goto return_same;
1059 new_len = len + nfound*(sub_len - pat_len);
1060
Guido van Rossumb18618d2000-05-03 23:44:39 +00001061 new_s = (char *)PyMem_MALLOC(new_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001062 if (new_s == NULL) return NULL;
1063
1064 *out_len = new_len;
1065 out_s = new_s;
1066
1067 while (len > 0) {
1068 /* find index of next instance of pattern */
1069 offset = mymemfind(str, len, pat, pat_len);
1070 /* if not found, break out of loop */
1071 if (offset == -1) break;
1072
1073 /* copy non matching part of input string */
1074 memcpy(new_s, str, offset); /* copy part of str before pat */
1075 str += offset + pat_len; /* move str past pattern */
1076 len -= offset + pat_len; /* reduce length of str remaining */
1077
1078 /* copy substitute into the output string */
1079 new_s += offset; /* move new_s to dest for sub string */
1080 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1081 new_s += sub_len; /* offset new_s past sub string */
Barry Warsawf577c081997-11-29 00:10:07 +00001082
1083 /* break when we've done count replacements */
1084 if (--count == 0) break;
Guido van Rossum101923b1997-04-02 06:11:18 +00001085 }
1086 /* copy any remaining values into output string */
1087 if (len > 0)
1088 memcpy(new_s, str, len);
1089 return out_s;
1090
1091 return_same:
1092 *out_len = -1;
1093 return str;
1094}
1095
1096
Guido van Rossum983c9301997-12-29 19:52:29 +00001097static char replace__doc__[] =
1098"replace (str, old, new[, maxsplit]) -> string\n\
1099\n\
1100Return a copy of string str with all occurrences of substring\n\
1101old replaced by new. If the optional argument maxsplit is\n\
1102given, only the first maxsplit occurrences are replaced.";
1103
1104static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001105strop_replace(PyObject *self, PyObject *args)
Guido van Rossum101923b1997-04-02 06:11:18 +00001106{
1107 char *str, *pat,*sub,*new_s;
1108 int len,pat_len,sub_len,out_len;
Barry Warsawf577c081997-11-29 00:10:07 +00001109 int count = 0;
Guido van Rossum101923b1997-04-02 06:11:18 +00001110 PyObject *new;
1111
Guido van Rossum43713e52000-02-29 13:59:29 +00001112 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
Barry Warsawf577c081997-11-29 00:10:07 +00001113 &str, &len, &pat, &pat_len, &sub, &sub_len,
1114 &count))
Guido van Rossum101923b1997-04-02 06:11:18 +00001115 return NULL;
Guido van Rossum4ccda151998-05-14 02:36:29 +00001116 if (pat_len <= 0) {
1117 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1118 return NULL;
1119 }
Barry Warsawf577c081997-11-29 00:10:07 +00001120 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001121 if (new_s == NULL) {
1122 PyErr_NoMemory();
1123 return NULL;
1124 }
1125 if (out_len == -1) {
1126 /* we're returning another reference to the input string */
1127 new = PyTuple_GetItem(args, 0);
1128 Py_XINCREF(new);
1129 }
1130 else {
1131 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001132 PyMem_FREE(new_s);
Guido van Rossum101923b1997-04-02 06:11:18 +00001133 }
1134 return new;
1135}
1136
1137
Guido van Rossume270b431992-09-03 20:21:07 +00001138/* List of functions defined in the module */
1139
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001140static PyMethodDef
1141strop_methods[] = {
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001142 {"atof", strop_atof,
1143 METH_VARARGS, atof__doc__},
1144 {"atoi", strop_atoi,
1145 METH_VARARGS, atoi__doc__},
1146 {"atol", strop_atol,
1147 METH_VARARGS, atol__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001148 {"capitalize", strop_capitalize,
1149 METH_OLDARGS, capitalize__doc__},
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001150 {"count", strop_count,
1151 METH_VARARGS, count__doc__},
1152 {"expandtabs", strop_expandtabs,
1153 METH_VARARGS, expandtabs__doc__},
1154 {"find", strop_find,
1155 METH_VARARGS, find__doc__},
1156 {"join", strop_joinfields,
1157 METH_VARARGS, joinfields__doc__},
1158 {"joinfields", strop_joinfields,
1159 METH_VARARGS, joinfields__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001160 {"lstrip", strop_lstrip,
1161 METH_OLDARGS, lstrip__doc__},
1162 {"lower", strop_lower,
1163 METH_OLDARGS, lower__doc__},
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001164 {"maketrans", strop_maketrans,
1165 METH_VARARGS, maketrans__doc__},
1166 {"replace", strop_replace,
1167 METH_VARARGS, replace__doc__},
1168 {"rfind", strop_rfind,
1169 METH_VARARGS, rfind__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001170 {"rstrip", strop_rstrip,
1171 METH_OLDARGS, rstrip__doc__},
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001172 {"split", strop_splitfields,
1173 METH_VARARGS, splitfields__doc__},
1174 {"splitfields", strop_splitfields,
1175 METH_VARARGS, splitfields__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001176 {"strip", strop_strip,
1177 METH_OLDARGS, strip__doc__},
1178 {"swapcase", strop_swapcase,
1179 METH_OLDARGS, swapcase__doc__},
Andrew M. Kuchlinge365fb82000-08-03 02:06:16 +00001180 {"translate", strop_translate,
1181 METH_VARARGS, translate__doc__},
Andrew M. Kuchlinga1abb722000-08-03 02:34:44 +00001182 {"upper", strop_upper,
1183 METH_OLDARGS, upper__doc__},
Guido van Rossume270b431992-09-03 20:21:07 +00001184 {NULL, NULL} /* sentinel */
1185};
1186
1187
Guido van Rossum3886bb61998-12-04 18:50:17 +00001188DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001189initstrop(void)
Guido van Rossume270b431992-09-03 20:21:07 +00001190{
Barry Warsawf5256011996-12-09 18:35:56 +00001191 PyObject *m, *d, *s;
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001192 char buf[256];
1193 int c, n;
Guido van Rossum983c9301997-12-29 19:52:29 +00001194 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1195 (PyObject*)NULL, PYTHON_API_VERSION);
Barry Warsawf5256011996-12-09 18:35:56 +00001196 d = PyModule_GetDict(m);
Guido van Rossume22e6441993-07-09 10:51:31 +00001197
1198 /* Create 'whitespace' object */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001199 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001200 for (c = 0; c < 256; c++) {
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001201 if (isspace(c))
1202 buf[n++] = c;
1203 }
Barry Warsawf5256011996-12-09 18:35:56 +00001204 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001205 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001206 PyDict_SetItemString(d, "whitespace", s);
1207 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001208 }
1209 /* Create 'lowercase' object */
1210 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001211 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001212 if (islower(c))
1213 buf[n++] = c;
1214 }
Barry Warsawf5256011996-12-09 18:35:56 +00001215 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001216 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001217 PyDict_SetItemString(d, "lowercase", s);
1218 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001219 }
1220
1221 /* Create 'uppercase' object */
1222 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001223 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001224 if (isupper(c))
1225 buf[n++] = c;
1226 }
Barry Warsawf5256011996-12-09 18:35:56 +00001227 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001228 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001229 PyDict_SetItemString(d, "uppercase", s);
1230 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001231 }
Guido van Rossume270b431992-09-03 20:21:07 +00001232}