blob: 612d0d3908ee7887da5f40627c0b449b2b13570d [file] [log] [blame]
Guido van Rossume270b431992-09-03 20:21:07 +00001/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00002Copyright (c) 2000, BeOpen.com.
3Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5All rights reserved.
Guido van Rossume270b431992-09-03 20:21:07 +00006
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007See the file "Misc/COPYRIGHT" for information on usage and
8redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossume270b431992-09-03 20:21:07 +00009******************************************************************/
10
11/* strop module */
12
Guido van Rossum983c9301997-12-29 19:52:29 +000013static char strop_module__doc__[] =
Guido van Rossum23e21e71997-12-29 19:57:36 +000014"Common string manipulations, optimized for speed.\n\
Guido van Rossum786205e1997-12-30 05:10:14 +000015\n\
Guido van Rossum983c9301997-12-29 19:52:29 +000016Always use \"import string\" rather than referencing\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +000017this module directly.";
Guido van Rossum983c9301997-12-29 19:52:29 +000018
Barry Warsawf5256011996-12-09 18:35:56 +000019#include "Python.h"
Guido van Rossume270b431992-09-03 20:21:07 +000020
Guido van Rossum7b7c5781997-03-14 04:13:56 +000021#ifdef HAVE_LIMITS_H
22#include <limits.h>
23#else
24#define INT_MAX 2147483647
25#endif
26
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000027#include <ctype.h>
Guido van Rossume22e6441993-07-09 10:51:31 +000028/* XXX This file assumes that the <ctype.h> is*() functions
29 XXX are defined for all 8-bit characters! */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000030
Guido van Rossum7999a5c1996-08-08 19:16:15 +000031/* The lstrip(), rstrip() and strip() functions are implemented
32 in do_strip(), which uses an additional parameter to indicate what
33 type of strip should occur. */
34
35#define LEFTSTRIP 0
36#define RIGHTSTRIP 1
37#define BOTHSTRIP 2
38
Guido van Rossume270b431992-09-03 20:21:07 +000039
Barry Warsawf5256011996-12-09 18:35:56 +000040static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000041split_whitespace(char *s, int len, int maxsplit)
Guido van Rossum009e79b1995-05-03 17:40:23 +000042{
Barry Warsawe8fc29c1997-01-03 22:45:34 +000043 int i = 0, j, err;
44 int countsplit = 0;
45 PyObject* item;
46 PyObject *list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +000047
Guido van Rossume270b431992-09-03 20:21:07 +000048 if (list == NULL)
49 return NULL;
50
Guido van Rossume270b431992-09-03 20:21:07 +000051 while (i < len) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +000052 while (i < len && isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000053 i = i+1;
54 }
55 j = i;
Guido van Rossumee1813d1995-02-14 00:58:59 +000056 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000057 i = i+1;
58 }
59 if (j < i) {
Barry Warsawf5256011996-12-09 18:35:56 +000060 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000061 if (item == NULL)
62 goto finally;
63
Barry Warsawf5256011996-12-09 18:35:56 +000064 err = PyList_Append(list, item);
65 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000066 if (err < 0)
67 goto finally;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000068
69 countsplit++;
Barry Warsaw93be92d1997-12-02 00:29:30 +000070 while (i < len && isspace(Py_CHARMASK(s[i]))) {
71 i = i+1;
72 }
73 if (maxsplit && (countsplit >= maxsplit) && i < len) {
Barry Warsawf5256011996-12-09 18:35:56 +000074 item = PyString_FromStringAndSize(
75 s+i, (int)(len - i));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000076 if (item == NULL)
77 goto finally;
78
Barry Warsawf5256011996-12-09 18:35:56 +000079 err = PyList_Append(list, item);
80 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000081 if (err < 0)
82 goto finally;
83
Guido van Rossum7999a5c1996-08-08 19:16:15 +000084 i = len;
85 }
Guido van Rossume270b431992-09-03 20:21:07 +000086 }
87 }
Guido van Rossume270b431992-09-03 20:21:07 +000088 return list;
Barry Warsawe8fc29c1997-01-03 22:45:34 +000089 finally:
90 Py_DECREF(list);
91 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +000092}
93
94
Guido van Rossum983c9301997-12-29 19:52:29 +000095static char splitfields__doc__[] =
Fred Drakee4f13661999-11-04 19:19:48 +000096"split(s [,sep [,maxsplit]]) -> list of strings\n\
97splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
Guido van Rossum983c9301997-12-29 19:52:29 +000098\n\
99Return a list of the words in the string s, using sep as the\n\
100delimiter string. If maxsplit is nonzero, splits into at most\n\
Fred Drakee4f13661999-11-04 19:19:48 +0000101maxsplit words. If sep is not specified, any whitespace string\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000102is a separator. Maxsplit defaults to 0.\n\
103\n\
104(split and splitfields are synonymous)";
105
Barry Warsawf5256011996-12-09 18:35:56 +0000106static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000107strop_splitfields(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000108{
Guido van Rossum572d2d91993-11-05 10:14:49 +0000109 int len, n, i, j, err;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000110 int splitcount, maxsplit;
Guido van Rossume270b431992-09-03 20:21:07 +0000111 char *s, *sub;
Barry Warsawf5256011996-12-09 18:35:56 +0000112 PyObject *list, *item;
Guido van Rossume270b431992-09-03 20:21:07 +0000113
Guido van Rossum009e79b1995-05-03 17:40:23 +0000114 sub = NULL;
115 n = 0;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000116 splitcount = 0;
117 maxsplit = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000118 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
Guido van Rossume270b431992-09-03 20:21:07 +0000119 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000120 if (sub == NULL)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000121 return split_whitespace(s, len, maxsplit);
Guido van Rossume270b431992-09-03 20:21:07 +0000122 if (n == 0) {
Barry Warsawf5256011996-12-09 18:35:56 +0000123 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossume270b431992-09-03 20:21:07 +0000124 return NULL;
125 }
126
Barry Warsawf5256011996-12-09 18:35:56 +0000127 list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +0000128 if (list == NULL)
129 return NULL;
130
131 i = j = 0;
132 while (i+n <= len) {
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000133 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000134 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000135 if (item == NULL)
136 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000137 err = PyList_Append(list, item);
138 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000139 if (err < 0)
140 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000141 i = j = i + n;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000142 splitcount++;
143 if (maxsplit && (splitcount >= maxsplit))
144 break;
Guido van Rossume270b431992-09-03 20:21:07 +0000145 }
146 else
147 i++;
148 }
Barry Warsawf5256011996-12-09 18:35:56 +0000149 item = PyString_FromStringAndSize(s+j, (int)(len-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000150 if (item == NULL)
151 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000152 err = PyList_Append(list, item);
153 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000154 if (err < 0)
155 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000156
157 return list;
Guido van Rossum572d2d91993-11-05 10:14:49 +0000158
159 fail:
Barry Warsawf5256011996-12-09 18:35:56 +0000160 Py_DECREF(list);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000161 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +0000162}
163
164
Guido van Rossum983c9301997-12-29 19:52:29 +0000165static char joinfields__doc__[] =
166"join(list [,sep]) -> string\n\
167joinfields(list [,sep]) -> string\n\
168\n\
169Return a string composed of the words in list, with\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000170intervening occurences of sep. Sep defaults to a single\n\
171space.\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000172\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000173(join and joinfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +0000174
Barry Warsawf5256011996-12-09 18:35:56 +0000175static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000176strop_joinfields(PyObject *self, PyObject *args)
Guido van Rossumc89705d1992-11-26 08:54:07 +0000177{
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000178 PyObject *seq;
179 char *sep = NULL;
180 int seqlen, seplen = 0;
181 int i, reslen = 0, slen = 0, sz = 100;
182 PyObject *res = NULL;
183 char* p = NULL;
184 intargfunc getitemfunc;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000185
Guido van Rossum43713e52000-02-29 13:59:29 +0000186 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
Guido van Rossumc89705d1992-11-26 08:54:07 +0000187 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000188 if (sep == NULL) {
189 sep = " ";
190 seplen = 1;
191 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000192
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000193 seqlen = PySequence_Size(seq);
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000194 if (seqlen < 0 && PyErr_Occurred())
195 return NULL;
196
197 if (seqlen == 1) {
198 /* Optimization if there's only one item */
199 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000200 if (item && !PyString_Check(item)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000201 PyErr_SetString(PyExc_TypeError,
202 "first argument must be sequence of strings");
Guido van Rossumbf338301998-10-19 13:38:36 +0000203 Py_DECREF(item);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000204 return NULL;
205 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000206 return item;
207 }
208
209 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
210 return NULL;
211 p = PyString_AsString(res);
212
213 /* optimize for lists, since it's the most common case. all others
214 * (tuples and arbitrary sequences) just use the sequence abstract
215 * interface.
Barry Warsaw04d2d151997-01-03 23:46:51 +0000216 */
217 if (PyList_Check(seq)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000218 for (i = 0; i < seqlen; i++) {
219 PyObject *item = PyList_GET_ITEM(seq, i);
220 if (!PyString_Check(item)) {
221 PyErr_SetString(PyExc_TypeError,
222 "first argument must be sequence of strings");
223 Py_DECREF(res);
224 return NULL;
225 }
226 slen = PyString_GET_SIZE(item);
227 while (reslen + slen + seplen >= sz) {
228 if (_PyString_Resize(&res, sz * 2)) {
229 Py_DECREF(res);
230 return NULL;
231 }
232 sz *= 2;
233 p = PyString_AsString(res) + reslen;
234 }
235 if (i > 0) {
236 memcpy(p, sep, seplen);
237 p += seplen;
238 reslen += seplen;
239 }
240 memcpy(p, PyString_AS_STRING(item), slen);
241 p += slen;
242 reslen += slen;
243 }
244 if (_PyString_Resize(&res, reslen)) {
245 Py_DECREF(res);
246 res = NULL;
247 }
248 return res;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000249 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000250
251 if (seq->ob_type->tp_as_sequence == NULL ||
252 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
253 {
Barry Warsawf5256011996-12-09 18:35:56 +0000254 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000255 "first argument must be a sequence");
Guido van Rossumc89705d1992-11-26 08:54:07 +0000256 return NULL;
257 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000258 /* This is now type safe */
Guido van Rossumc89705d1992-11-26 08:54:07 +0000259 for (i = 0; i < seqlen; i++) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000260 PyObject *item = getitemfunc(seq, i);
261 if (!item || !PyString_Check(item)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000262 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000263 "first argument must be sequence of strings");
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000264 Py_DECREF(res);
265 Py_XDECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000266 return NULL;
267 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000268 slen = PyString_GET_SIZE(item);
269 while (reslen + slen + seplen >= sz) {
270 if (_PyString_Resize(&res, sz * 2)) {
271 Py_DECREF(res);
272 Py_DECREF(item);
273 return NULL;
274 }
275 sz *= 2;
276 p = PyString_AsString(res) + reslen;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000277 }
Guido van Rossumc89705d1992-11-26 08:54:07 +0000278 if (i > 0) {
279 memcpy(p, sep, seplen);
280 p += seplen;
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000281 reslen += seplen;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000282 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000283 memcpy(p, PyString_AS_STRING(item), slen);
284 p += slen;
285 reslen += slen;
286 Py_DECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000287 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000288 if (_PyString_Resize(&res, reslen)) {
289 Py_DECREF(res);
290 res = NULL;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000291 }
292 return res;
293}
294
Guido van Rossum983c9301997-12-29 19:52:29 +0000295
296static char find__doc__[] =
297"find(s, sub [,start [,end]]) -> in\n\
298\n\
299Return the lowest index in s where substring sub is found,\n\
300such that sub is contained within s[start,end]. Optional\n\
301arguments start and end are interpreted as in slice notation.\n\
302\n\
303Return -1 on failure.";
304
Barry Warsawf5256011996-12-09 18:35:56 +0000305static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000306strop_find(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000307{
308 char *s, *sub;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000309 int len, n, i = 0, last = INT_MAX;
Guido van Rossume270b431992-09-03 20:21:07 +0000310
Guido van Rossum43713e52000-02-29 13:59:29 +0000311 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000312 return NULL;
313
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000314 if (last > len)
315 last = len;
316 if (last < 0)
317 last += len;
318 if (last < 0)
319 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000320 if (i < 0)
321 i += len;
322 if (i < 0)
Guido van Rossume270b431992-09-03 20:21:07 +0000323 i = 0;
Guido van Rossume270b431992-09-03 20:21:07 +0000324
Guido van Rossum031c6311998-03-24 04:19:22 +0000325 if (n == 0 && i <= last)
Barry Warsawf5256011996-12-09 18:35:56 +0000326 return PyInt_FromLong((long)i);
Guido van Rossume270b431992-09-03 20:21:07 +0000327
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000328 last -= n;
329 for (; i <= last; ++i)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000330 if (s[i] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000331 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000332 return PyInt_FromLong((long)i);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000333
Barry Warsawf5256011996-12-09 18:35:56 +0000334 return PyInt_FromLong(-1L);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000335}
336
337
Guido van Rossum983c9301997-12-29 19:52:29 +0000338static char rfind__doc__[] =
339"rfind(s, sub [,start [,end]]) -> int\n\
340\n\
341Return the highest index in s where substring sub is found,\n\
342such that sub is contained within s[start,end]. Optional\n\
343arguments start and end are interpreted as in slice notation.\n\
344\n\
345Return -1 on failure.";
346
Barry Warsawf5256011996-12-09 18:35:56 +0000347static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000348strop_rfind(PyObject *self, PyObject *args)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000349{
350 char *s, *sub;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000351 int len, n, j;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000352 int i = 0, last = INT_MAX;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000353
Guido van Rossum43713e52000-02-29 13:59:29 +0000354 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000355 return NULL;
356
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000357 if (last > len)
358 last = len;
359 if (last < 0)
360 last += len;
361 if (last < 0)
362 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000363 if (i < 0)
364 i += len;
365 if (i < 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000366 i = 0;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000367
Guido van Rossum031c6311998-03-24 04:19:22 +0000368 if (n == 0 && i <= last)
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000369 return PyInt_FromLong((long)last);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000370
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000371 for (j = last-n; j >= i; --j)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000372 if (s[j] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000373 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000374 return PyInt_FromLong((long)j);
Guido van Rossume270b431992-09-03 20:21:07 +0000375
Barry Warsawf5256011996-12-09 18:35:56 +0000376 return PyInt_FromLong(-1L);
Guido van Rossume270b431992-09-03 20:21:07 +0000377}
378
Guido van Rossum983c9301997-12-29 19:52:29 +0000379
Barry Warsawf5256011996-12-09 18:35:56 +0000380static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000381do_strip(PyObject *args, int striptype)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000382{
383 char *s;
384 int len, i, j;
385
386
Guido van Rossum7e488981998-10-08 02:25:24 +0000387 if (!PyArg_Parse(args, "t#", &s, &len))
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000388 return NULL;
389
390 i = 0;
391 if (striptype != RIGHTSTRIP) {
392 while (i < len && isspace(Py_CHARMASK(s[i]))) {
393 i++;
394 }
395 }
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000396
397 j = len;
398 if (striptype != LEFTSTRIP) {
399 do {
400 j--;
401 } while (j >= i && isspace(Py_CHARMASK(s[j])));
402 j++;
403 }
404
405 if (i == 0 && j == len) {
Barry Warsawf5256011996-12-09 18:35:56 +0000406 Py_INCREF(args);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000407 return args;
408 }
409 else
Barry Warsawf5256011996-12-09 18:35:56 +0000410 return PyString_FromStringAndSize(s+i, j-i);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000411}
412
Guido van Rossume270b431992-09-03 20:21:07 +0000413
Guido van Rossum983c9301997-12-29 19:52:29 +0000414static char strip__doc__[] =
415"strip(s) -> string\n\
416\n\
417Return a copy of the string s with leading and trailing\n\
418whitespace removed.";
419
Barry Warsawf5256011996-12-09 18:35:56 +0000420static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000421strop_strip(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000422{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000423 return do_strip(args, BOTHSTRIP);
424}
Guido van Rossume270b431992-09-03 20:21:07 +0000425
Guido van Rossum983c9301997-12-29 19:52:29 +0000426
427static char lstrip__doc__[] =
428"lstrip(s) -> string\n\
429\n\
430Return a copy of the string s with leading whitespace removed.";
431
Barry Warsawf5256011996-12-09 18:35:56 +0000432static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000433strop_lstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000434{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000435 return do_strip(args, LEFTSTRIP);
436}
Guido van Rossume270b431992-09-03 20:21:07 +0000437
Guido van Rossum983c9301997-12-29 19:52:29 +0000438
439static char rstrip__doc__[] =
440"rstrip(s) -> string\n\
441\n\
442Return a copy of the string s with trailing whitespace removed.";
443
Barry Warsawf5256011996-12-09 18:35:56 +0000444static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000445strop_rstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000446{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000447 return do_strip(args, RIGHTSTRIP);
Guido van Rossume270b431992-09-03 20:21:07 +0000448}
449
450
Guido van Rossum983c9301997-12-29 19:52:29 +0000451static char lower__doc__[] =
452"lower(s) -> string\n\
453\n\
454Return a copy of the string s converted to lowercase.";
455
Barry Warsawf5256011996-12-09 18:35:56 +0000456static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000457strop_lower(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000458{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000459 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000460 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000461 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000462 int changed;
463
Guido van Rossum7e488981998-10-08 02:25:24 +0000464 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000465 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000466 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000467 if (new == NULL)
468 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000469 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000470 changed = 0;
471 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000472 int c = Py_CHARMASK(*s++);
Barry Warsaw04d2d151997-01-03 23:46:51 +0000473 if (isupper(c)) {
Guido van Rossum5c850621992-09-11 23:55:51 +0000474 changed = 1;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000475 *s_new = tolower(c);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000476 } else
477 *s_new = c;
478 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000479 }
480 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000481 Py_DECREF(new);
482 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000483 return args;
484 }
485 return new;
486}
487
488
Guido van Rossum983c9301997-12-29 19:52:29 +0000489static char upper__doc__[] =
490"upper(s) -> string\n\
491\n\
492Return a copy of the string s converted to uppercase.";
493
Barry Warsawf5256011996-12-09 18:35:56 +0000494static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000495strop_upper(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000496{
Barry Warsaw04d2d151997-01-03 23:46:51 +0000497 char *s, *s_new;
498 int i, n;
499 PyObject *new;
500 int changed;
501
Guido van Rossum7e488981998-10-08 02:25:24 +0000502 if (!PyArg_Parse(args, "t#", &s, &n))
Barry Warsaw04d2d151997-01-03 23:46:51 +0000503 return NULL;
504 new = PyString_FromStringAndSize(NULL, n);
505 if (new == NULL)
506 return NULL;
507 s_new = PyString_AsString(new);
508 changed = 0;
509 for (i = 0; i < n; i++) {
510 int c = Py_CHARMASK(*s++);
511 if (islower(c)) {
512 changed = 1;
513 *s_new = toupper(c);
514 } else
515 *s_new = c;
516 s_new++;
517 }
518 if (!changed) {
519 Py_DECREF(new);
520 Py_INCREF(args);
521 return args;
522 }
523 return new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000524}
525
526
Guido van Rossum983c9301997-12-29 19:52:29 +0000527static char capitalize__doc__[] =
528"capitalize(s) -> string\n\
529\n\
530Return a copy of the string s with only its first character\n\
531capitalized.";
532
Barry Warsawf5256011996-12-09 18:35:56 +0000533static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000534strop_capitalize(PyObject *self, PyObject *args)
Guido van Rossum27457531996-06-12 04:24:52 +0000535{
536 char *s, *s_new;
537 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000538 PyObject *new;
Guido van Rossum27457531996-06-12 04:24:52 +0000539 int changed;
540
Guido van Rossum7e488981998-10-08 02:25:24 +0000541 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum27457531996-06-12 04:24:52 +0000542 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000543 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum27457531996-06-12 04:24:52 +0000544 if (new == NULL)
545 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000546 s_new = PyString_AsString(new);
Guido van Rossum27457531996-06-12 04:24:52 +0000547 changed = 0;
Guido van Rossum529c9631996-06-17 16:59:33 +0000548 if (0 < n) {
Guido van Rossum27457531996-06-12 04:24:52 +0000549 int c = Py_CHARMASK(*s++);
550 if (islower(c)) {
551 changed = 1;
552 *s_new = toupper(c);
553 } else
554 *s_new = c;
555 s_new++;
556 }
557 for (i = 1; i < n; i++) {
558 int c = Py_CHARMASK(*s++);
559 if (isupper(c)) {
560 changed = 1;
561 *s_new = tolower(c);
562 } else
563 *s_new = c;
564 s_new++;
565 }
566 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000567 Py_DECREF(new);
568 Py_INCREF(args);
Guido van Rossum27457531996-06-12 04:24:52 +0000569 return args;
570 }
571 return new;
572}
573
574
Guido van Rossum54ec2881999-01-25 22:36:24 +0000575static char expandtabs__doc__[] =
576"expandtabs(string, [tabsize]) -> string\n\
577\n\
Guido van Rossum54ec2881999-01-25 22:36:24 +0000578Expand tabs in a string, i.e. replace them by one or more spaces,\n\
579depending on the current column and the given tab size (default 8).\n\
580The column number is reset to zero after each newline occurring in the\n\
581string. This doesn't understand other non-printing characters.";
582
583static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000584strop_expandtabs(PyObject *self, PyObject *args)
Guido van Rossum54ec2881999-01-25 22:36:24 +0000585{
586 /* Original by Fredrik Lundh */
587 char* e;
588 char* p;
589 char* q;
590 int i, j;
591 PyObject* out;
592 char* string;
593 int stringlen;
594 int tabsize = 8;
595
596 /* Get arguments */
Guido van Rossum43713e52000-02-29 13:59:29 +0000597 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
Guido van Rossum54ec2881999-01-25 22:36:24 +0000598 return NULL;
599 if (tabsize < 1) {
600 PyErr_SetString(PyExc_ValueError,
601 "tabsize must be at least 1");
602 return NULL;
603 }
604
605 /* First pass: determine size of output string */
606 i = j = 0; /* j: current column; i: total of previous lines */
607 e = string + stringlen;
608 for (p = string; p < e; p++) {
609 if (*p == '\t')
610 j += tabsize - (j%tabsize);
611 else {
612 j++;
613 if (*p == '\n') {
614 i += j;
615 j = 0;
616 }
617 }
618 }
619
620 /* Second pass: create output string and fill it */
621 out = PyString_FromStringAndSize(NULL, i+j);
622 if (out == NULL)
623 return NULL;
624
625 i = 0;
626 q = PyString_AS_STRING(out);
627
628 for (p = string; p < e; p++) {
629 if (*p == '\t') {
630 j = tabsize - (i%tabsize);
631 i += j;
632 while (j-- > 0)
633 *q++ = ' ';
634 } else {
635 *q++ = *p;
636 i++;
637 if (*p == '\n')
638 i = 0;
639 }
640 }
641
642 return out;
643}
644
645
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000646static char count__doc__[] =
647"count(s, sub[, start[, end]]) -> int\n\
648\n\
649Return the number of occurrences of substring sub in string\n\
650s[start:end]. Optional arguments start and end are\n\
651interpreted as in slice notation.";
652
653static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000654strop_count(PyObject *self, PyObject *args)
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000655{
656 char *s, *sub;
Guido van Rossumc5015831998-10-07 16:36:14 +0000657 int len, n;
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000658 int i = 0, last = INT_MAX;
659 int m, r;
660
Guido van Rossum43713e52000-02-29 13:59:29 +0000661 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000662 return NULL;
663 if (last > len)
664 last = len;
665 if (last < 0)
666 last += len;
667 if (last < 0)
668 last = 0;
669 if (i < 0)
670 i += len;
671 if (i < 0)
672 i = 0;
673 m = last + 1 - n;
674 if (n == 0)
675 return PyInt_FromLong((long) (m-i));
676
677 r = 0;
678 while (i < m) {
679 if (!memcmp(s+i, sub, n)) {
680 r++;
681 i += n;
682 } else {
683 i++;
684 }
685 }
686 return PyInt_FromLong((long) r);
687}
688
689
Guido van Rossum983c9301997-12-29 19:52:29 +0000690static char swapcase__doc__[] =
691"swapcase(s) -> string\n\
692\n\
693Return a copy of the string s with upper case characters\n\
694converted to lowercase and vice versa.";
695
Barry Warsawf5256011996-12-09 18:35:56 +0000696static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000697strop_swapcase(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000698{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000699 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000700 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000701 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000702 int changed;
703
Guido van Rossum7e488981998-10-08 02:25:24 +0000704 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000705 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000706 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000707 if (new == NULL)
708 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000709 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000710 changed = 0;
711 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000712 int c = Py_CHARMASK(*s++);
Guido van Rossum5c850621992-09-11 23:55:51 +0000713 if (islower(c)) {
714 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000715 *s_new = toupper(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000716 }
717 else if (isupper(c)) {
718 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000719 *s_new = tolower(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000720 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000721 else
722 *s_new = c;
723 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000724 }
725 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000726 Py_DECREF(new);
727 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000728 return args;
729 }
730 return new;
731}
732
733
Guido van Rossum983c9301997-12-29 19:52:29 +0000734static char atoi__doc__[] =
735"atoi(s [,base]) -> int\n\
736\n\
737Return the integer represented by the string s in the given\n\
738base, which defaults to 10. The string s must consist of one\n\
739or more digits, possibly preceded by a sign. If base is 0, it\n\
740is chosen from the leading characters of s, 0 for octal, 0x or\n\
7410X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
742accepted.";
743
Barry Warsawf5256011996-12-09 18:35:56 +0000744static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000745strop_atoi(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000746{
Guido van Rossumb6775db1994-08-01 11:34:53 +0000747 char *s, *end;
748 int base = 10;
749 long x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000750 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000751
Guido van Rossum43713e52000-02-29 13:59:29 +0000752 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000753 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000754
755 if ((base != 0 && base < 2) || base > 36) {
756 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
757 return NULL;
758 }
759
Guido van Rossumc35f9331996-09-11 23:30:42 +0000760 while (*s && isspace(Py_CHARMASK(*s)))
761 s++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000762 errno = 0;
763 if (base == 0 && s[0] == '0')
Barry Warsawf5256011996-12-09 18:35:56 +0000764 x = (long) PyOS_strtoul(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000765 else
Barry Warsawf5256011996-12-09 18:35:56 +0000766 x = PyOS_strtol(s, &end, base);
Guido van Rossum5bd69db1999-02-22 16:18:44 +0000767 if (end == s || !isalnum(end[-1]))
Guido van Rossum923fece51998-08-04 15:04:52 +0000768 goto bad;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000769 while (*end && isspace(Py_CHARMASK(*end)))
770 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000771 if (*end != '\0') {
Guido van Rossum923fece51998-08-04 15:04:52 +0000772 bad:
Guido van Rossumc35f9331996-09-11 23:30:42 +0000773 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000774 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000775 return NULL;
776 }
777 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000778 sprintf(buffer, "atoi() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000779 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000780 return NULL;
781 }
Barry Warsawf5256011996-12-09 18:35:56 +0000782 return PyInt_FromLong(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000783}
784
785
Guido van Rossum983c9301997-12-29 19:52:29 +0000786static char atol__doc__[] =
787"atol(s [,base]) -> long\n\
788\n\
789Return the long integer represented by the string s in the\n\
790given base, which defaults to 10. The string s must consist\n\
791of one or more digits, possibly preceded by a sign. If base\n\
792is 0, it is chosen from the leading characters of s, 0 for\n\
793octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
7940x or 0X is accepted. A trailing L or l is not accepted,\n\
795unless base is 0.";
796
Barry Warsawf5256011996-12-09 18:35:56 +0000797static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000798strop_atol(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000799{
800 char *s, *end;
801 int base = 10;
Barry Warsawf5256011996-12-09 18:35:56 +0000802 PyObject *x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000803 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000804
Guido van Rossum43713e52000-02-29 13:59:29 +0000805 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000806 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000807
808 if ((base != 0 && base < 2) || base > 36) {
809 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
810 return NULL;
811 }
812
Guido van Rossumc35f9331996-09-11 23:30:42 +0000813 while (*s && isspace(Py_CHARMASK(*s)))
814 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000815 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000816 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000817 return NULL;
818 }
Barry Warsawf5256011996-12-09 18:35:56 +0000819 x = PyLong_FromString(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000820 if (x == NULL)
821 return NULL;
822 if (base == 0 && (*end == 'l' || *end == 'L'))
823 end++;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000824 while (*end && isspace(Py_CHARMASK(*end)))
825 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000826 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000827 sprintf(buffer, "invalid literal for atol(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000828 PyErr_SetString(PyExc_ValueError, buffer);
829 Py_DECREF(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000830 return NULL;
831 }
832 return x;
833}
834
835
Guido van Rossum983c9301997-12-29 19:52:29 +0000836static char atof__doc__[] =
837"atof(s) -> float\n\
838\n\
839Return the floating point number represented by the string s.";
840
Barry Warsawf5256011996-12-09 18:35:56 +0000841static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000842strop_atof(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000843{
Tim Petersdbd9ba62000-07-09 03:09:57 +0000844 extern double strtod(const char *, char **);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000845 char *s, *end;
846 double x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000847 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000848
Guido van Rossum43713e52000-02-29 13:59:29 +0000849 if (!PyArg_ParseTuple(args, "s:atof", &s))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000850 return NULL;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000851 while (*s && isspace(Py_CHARMASK(*s)))
852 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000853 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000854 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000855 return NULL;
856 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000857 errno = 0;
Guido van Rossum52fa3a61997-02-14 22:59:58 +0000858 PyFPE_START_PROTECT("strop_atof", return 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000859 x = strtod(s, &end);
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000860 PyFPE_END_PROTECT(x)
Guido van Rossumc35f9331996-09-11 23:30:42 +0000861 while (*end && isspace(Py_CHARMASK(*end)))
862 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000863 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000864 sprintf(buffer, "invalid literal for atof(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000865 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000866 return NULL;
867 }
868 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000869 sprintf(buffer, "atof() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000870 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000871 return NULL;
872 }
Barry Warsawf5256011996-12-09 18:35:56 +0000873 return PyFloat_FromDouble(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000874}
875
876
Guido van Rossum983c9301997-12-29 19:52:29 +0000877static char maketrans__doc__[] =
878"maketrans(frm, to) -> string\n\
879\n\
880Return a translation table (a string of 256 bytes long)\n\
881suitable for use in string.translate. The strings frm and to\n\
882must be of the same length.";
883
Guido van Rossumed7253c1996-07-23 18:12:39 +0000884static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000885strop_maketrans(PyObject *self, PyObject *args)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000886{
Guido van Rossume0548b81997-01-06 16:50:09 +0000887 unsigned char *c, *from=NULL, *to=NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000888 int i, fromlen=0, tolen=0;
Guido van Rossume0548b81997-01-06 16:50:09 +0000889 PyObject *result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000890
Guido van Rossum43713e52000-02-29 13:59:29 +0000891 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000892 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000893
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000894 if (fromlen != tolen) {
Barry Warsawf5256011996-12-09 18:35:56 +0000895 PyErr_SetString(PyExc_ValueError,
Guido van Rossumed7253c1996-07-23 18:12:39 +0000896 "maketrans arguments must have same length");
897 return NULL;
898 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000899
900 result = PyString_FromStringAndSize((char *)NULL, 256);
901 if (result == NULL)
902 return NULL;
903 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000904 for (i = 0; i < 256; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000905 c[i]=(unsigned char)i;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000906 for (i = 0; i < fromlen; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000907 c[from[i]]=to[i];
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000908
Guido van Rossume0548b81997-01-06 16:50:09 +0000909 return result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000910}
911
912
Guido van Rossum983c9301997-12-29 19:52:29 +0000913static char translate__doc__[] =
914"translate(s,table [,deletechars]) -> string\n\
915\n\
916Return a copy of the string s, where all characters occurring\n\
917in the optional argument deletechars are removed, and the\n\
918remaining characters have been mapped through the given\n\
919translation table, which must be a string of length 256.";
920
Barry Warsawf5256011996-12-09 18:35:56 +0000921static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000922strop_translate(PyObject *self, PyObject *args)
Guido van Rossuma3127e81995-09-13 17:39:06 +0000923{
Guido van Rossume0548b81997-01-06 16:50:09 +0000924 register char *input, *table, *output;
925 register int i, c, changed = 0;
926 PyObject *input_obj;
927 char *table1, *output_start, *del_table=NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000928 int inlen, tablen, dellen = 0;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000929 PyObject *result;
Guido van Rossume0548b81997-01-06 16:50:09 +0000930 int trans_table[256];
Guido van Rossuma3127e81995-09-13 17:39:06 +0000931
Guido van Rossum43713e52000-02-29 13:59:29 +0000932 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
Guido van Rossume0548b81997-01-06 16:50:09 +0000933 &table1, &tablen, &del_table, &dellen))
Guido van Rossuma3127e81995-09-13 17:39:06 +0000934 return NULL;
935 if (tablen != 256) {
Barry Warsawf5256011996-12-09 18:35:56 +0000936 PyErr_SetString(PyExc_ValueError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000937 "translation table must be 256 characters long");
Guido van Rossuma3127e81995-09-13 17:39:06 +0000938 return NULL;
939 }
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000940
Guido van Rossume0548b81997-01-06 16:50:09 +0000941 table = table1;
942 inlen = PyString_Size(input_obj);
Guido van Rossumed7253c1996-07-23 18:12:39 +0000943 result = PyString_FromStringAndSize((char *)NULL, inlen);
Guido van Rossuma3127e81995-09-13 17:39:06 +0000944 if (result == NULL)
945 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000946 output_start = output = PyString_AsString(result);
Guido van Rossume0548b81997-01-06 16:50:09 +0000947 input = PyString_AsString(input_obj);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000948
Guido van Rossume0548b81997-01-06 16:50:09 +0000949 if (dellen == 0) {
950 /* If no deletions are required, use faster code */
951 for (i = inlen; --i >= 0; ) {
952 c = Py_CHARMASK(*input++);
953 if (Py_CHARMASK((*output++ = table[c])) != c)
954 changed = 1;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000955 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000956 if (changed)
957 return result;
958 Py_DECREF(result);
959 Py_INCREF(input_obj);
960 return input_obj;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000961 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000962
963 for (i = 0; i < 256; i++)
964 trans_table[i] = Py_CHARMASK(table[i]);
965
Guido van Rossum983c9301997-12-29 19:52:29 +0000966 for (i = 0; i < dellen; i++)
Guido van Rossum1ed5e571997-04-29 21:34:16 +0000967 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Guido van Rossume0548b81997-01-06 16:50:09 +0000968
969 for (i = inlen; --i >= 0; ) {
970 c = Py_CHARMASK(*input++);
971 if (trans_table[c] != -1)
972 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
973 continue;
974 changed = 1;
975 }
976 if (!changed) {
977 Py_DECREF(result);
978 Py_INCREF(input_obj);
979 return input_obj;
980 }
981 /* Fix the size of the resulting string */
982 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
Guido van Rossum983c9301997-12-29 19:52:29 +0000983 return NULL;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000984 return result;
985}
986
987
Guido van Rossum101923b1997-04-02 06:11:18 +0000988/* What follows is used for implementing replace(). Perry Stoll. */
989
990/*
991 mymemfind
992
993 strstr replacement for arbitrary blocks of memory.
994
Barry Warsaw51ac5802000-03-20 16:36:48 +0000995 Locates the first occurrence in the memory pointed to by MEM of the
Guido van Rossum101923b1997-04-02 06:11:18 +0000996 contents of memory pointed to by PAT. Returns the index into MEM if
997 found, or -1 if not found. If len of PAT is greater than length of
Guido van Rossum983c9301997-12-29 19:52:29 +0000998 MEM, the function returns -1.
Guido van Rossum101923b1997-04-02 06:11:18 +0000999*/
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001000static int mymemfind(char *mem, int len, char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001001{
1002 register int ii;
1003
1004 /* pattern can not occur in the last pat_len-1 chars */
1005 len -= pat_len;
1006
1007 for (ii = 0; ii <= len; ii++) {
1008 if (mem[ii] == pat[0] &&
1009 (pat_len == 1 ||
1010 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1011 return ii;
1012 }
1013 }
1014 return -1;
1015}
1016
1017/*
1018 mymemcnt
1019
1020 Return the number of distinct times PAT is found in MEM.
1021 meaning mem=1111 and pat==11 returns 2.
1022 mem=11111 and pat==11 also return 2.
1023 */
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001024static int mymemcnt(char *mem, int len, char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001025{
1026 register int offset = 0;
1027 int nfound = 0;
1028
1029 while (len >= 0) {
1030 offset = mymemfind(mem, len, pat, pat_len);
1031 if (offset == -1)
1032 break;
1033 mem += offset + pat_len;
1034 len -= offset + pat_len;
1035 nfound++;
1036 }
1037 return nfound;
1038}
1039
Guido van Rossum983c9301997-12-29 19:52:29 +00001040/*
Guido van Rossum101923b1997-04-02 06:11:18 +00001041 mymemreplace
1042
1043 Return a string in which all occurences of PAT in memory STR are
Guido van Rossum983c9301997-12-29 19:52:29 +00001044 replaced with SUB.
Guido van Rossum101923b1997-04-02 06:11:18 +00001045
1046 If length of PAT is less than length of STR or there are no occurences
1047 of PAT in STR, then the original string is returned. Otherwise, a new
1048 string is allocated here and returned.
Guido van Rossum983c9301997-12-29 19:52:29 +00001049
Guido van Rossum101923b1997-04-02 06:11:18 +00001050 on return, out_len is:
1051 the length of output string, or
1052 -1 if the input string is returned, or
1053 unchanged if an error occurs (no memory).
1054
1055 return value is:
1056 the new string allocated locally, or
1057 NULL if an error occurred.
1058*/
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001059static char *mymemreplace(char *str, int len, char *pat, int pat_len, char *sub, int sub_len, int count, int *out_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001060{
1061 char *out_s;
1062 char *new_s;
1063 int nfound, offset, new_len;
1064
1065 if (len == 0 || pat_len > len)
1066 goto return_same;
1067
1068 /* find length of output string */
1069 nfound = mymemcnt(str, len, pat, pat_len);
Barry Warsawf577c081997-11-29 00:10:07 +00001070 if (count > 0)
1071 nfound = nfound > count ? count : nfound;
Guido van Rossum101923b1997-04-02 06:11:18 +00001072 if (nfound == 0)
1073 goto return_same;
1074 new_len = len + nfound*(sub_len - pat_len);
1075
Guido van Rossumb18618d2000-05-03 23:44:39 +00001076 new_s = (char *)PyMem_MALLOC(new_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001077 if (new_s == NULL) return NULL;
1078
1079 *out_len = new_len;
1080 out_s = new_s;
1081
1082 while (len > 0) {
1083 /* find index of next instance of pattern */
1084 offset = mymemfind(str, len, pat, pat_len);
1085 /* if not found, break out of loop */
1086 if (offset == -1) break;
1087
1088 /* copy non matching part of input string */
1089 memcpy(new_s, str, offset); /* copy part of str before pat */
1090 str += offset + pat_len; /* move str past pattern */
1091 len -= offset + pat_len; /* reduce length of str remaining */
1092
1093 /* copy substitute into the output string */
1094 new_s += offset; /* move new_s to dest for sub string */
1095 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1096 new_s += sub_len; /* offset new_s past sub string */
Barry Warsawf577c081997-11-29 00:10:07 +00001097
1098 /* break when we've done count replacements */
1099 if (--count == 0) break;
Guido van Rossum101923b1997-04-02 06:11:18 +00001100 }
1101 /* copy any remaining values into output string */
1102 if (len > 0)
1103 memcpy(new_s, str, len);
1104 return out_s;
1105
1106 return_same:
1107 *out_len = -1;
1108 return str;
1109}
1110
1111
Guido van Rossum983c9301997-12-29 19:52:29 +00001112static char replace__doc__[] =
1113"replace (str, old, new[, maxsplit]) -> string\n\
1114\n\
1115Return a copy of string str with all occurrences of substring\n\
1116old replaced by new. If the optional argument maxsplit is\n\
1117given, only the first maxsplit occurrences are replaced.";
1118
1119static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001120strop_replace(PyObject *self, PyObject *args)
Guido van Rossum101923b1997-04-02 06:11:18 +00001121{
1122 char *str, *pat,*sub,*new_s;
1123 int len,pat_len,sub_len,out_len;
Barry Warsawf577c081997-11-29 00:10:07 +00001124 int count = 0;
Guido van Rossum101923b1997-04-02 06:11:18 +00001125 PyObject *new;
1126
Guido van Rossum43713e52000-02-29 13:59:29 +00001127 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
Barry Warsawf577c081997-11-29 00:10:07 +00001128 &str, &len, &pat, &pat_len, &sub, &sub_len,
1129 &count))
Guido van Rossum101923b1997-04-02 06:11:18 +00001130 return NULL;
Guido van Rossum4ccda151998-05-14 02:36:29 +00001131 if (pat_len <= 0) {
1132 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1133 return NULL;
1134 }
Barry Warsawf577c081997-11-29 00:10:07 +00001135 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001136 if (new_s == NULL) {
1137 PyErr_NoMemory();
1138 return NULL;
1139 }
1140 if (out_len == -1) {
1141 /* we're returning another reference to the input string */
1142 new = PyTuple_GetItem(args, 0);
1143 Py_XINCREF(new);
1144 }
1145 else {
1146 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001147 PyMem_FREE(new_s);
Guido van Rossum101923b1997-04-02 06:11:18 +00001148 }
1149 return new;
1150}
1151
1152
Guido van Rossume270b431992-09-03 20:21:07 +00001153/* List of functions defined in the module */
1154
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001155static PyMethodDef
1156strop_methods[] = {
Guido van Rossum983c9301997-12-29 19:52:29 +00001157 {"atof", strop_atof, 1, atof__doc__},
1158 {"atoi", strop_atoi, 1, atoi__doc__},
1159 {"atol", strop_atol, 1, atol__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001160 {"capitalize", strop_capitalize, 0, capitalize__doc__},
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +00001161 {"count", strop_count, 1, count__doc__},
Guido van Rossum54ec2881999-01-25 22:36:24 +00001162 {"expandtabs", strop_expandtabs, 1, expandtabs__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001163 {"find", strop_find, 1, find__doc__},
1164 {"join", strop_joinfields, 1, joinfields__doc__},
1165 {"joinfields", strop_joinfields, 1, joinfields__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001166 {"lstrip", strop_lstrip, 0, lstrip__doc__},
1167 {"lower", strop_lower, 0, lower__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001168 {"maketrans", strop_maketrans, 1, maketrans__doc__},
1169 {"replace", strop_replace, 1, replace__doc__},
1170 {"rfind", strop_rfind, 1, rfind__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001171 {"rstrip", strop_rstrip, 0, rstrip__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001172 {"split", strop_splitfields, 1, splitfields__doc__},
1173 {"splitfields", strop_splitfields, 1, splitfields__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001174 {"strip", strop_strip, 0, strip__doc__},
1175 {"swapcase", strop_swapcase, 0, swapcase__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001176 {"translate", strop_translate, 1, translate__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001177 {"upper", strop_upper, 0, upper__doc__},
Guido van Rossume270b431992-09-03 20:21:07 +00001178 {NULL, NULL} /* sentinel */
1179};
1180
1181
Guido van Rossum3886bb61998-12-04 18:50:17 +00001182DL_EXPORT(void)
Guido van Rossume270b431992-09-03 20:21:07 +00001183initstrop()
1184{
Barry Warsawf5256011996-12-09 18:35:56 +00001185 PyObject *m, *d, *s;
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001186 char buf[256];
1187 int c, n;
Guido van Rossum983c9301997-12-29 19:52:29 +00001188 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1189 (PyObject*)NULL, PYTHON_API_VERSION);
Barry Warsawf5256011996-12-09 18:35:56 +00001190 d = PyModule_GetDict(m);
Guido van Rossume22e6441993-07-09 10:51:31 +00001191
1192 /* Create 'whitespace' object */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001193 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001194 for (c = 0; c < 256; c++) {
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001195 if (isspace(c))
1196 buf[n++] = c;
1197 }
Barry Warsawf5256011996-12-09 18:35:56 +00001198 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001199 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001200 PyDict_SetItemString(d, "whitespace", s);
1201 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001202 }
1203 /* Create 'lowercase' object */
1204 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001205 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001206 if (islower(c))
1207 buf[n++] = c;
1208 }
Barry Warsawf5256011996-12-09 18:35:56 +00001209 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001210 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001211 PyDict_SetItemString(d, "lowercase", s);
1212 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001213 }
1214
1215 /* Create 'uppercase' object */
1216 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001217 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001218 if (isupper(c))
1219 buf[n++] = c;
1220 }
Barry Warsawf5256011996-12-09 18:35:56 +00001221 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001222 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001223 PyDict_SetItemString(d, "uppercase", s);
1224 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001225 }
1226
Barry Warsawf5256011996-12-09 18:35:56 +00001227 if (PyErr_Occurred())
1228 Py_FatalError("can't initialize module strop");
Guido van Rossume270b431992-09-03 20:21:07 +00001229}