blob: aa9cdc76a052820738526277c4062e9b0377e582 [file] [log] [blame]
Guido van Rossume270b431992-09-03 20:21:07 +00001/* strop module */
2
Guido van Rossum983c9301997-12-29 19:52:29 +00003static char strop_module__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +00004"Common string manipulations, optimized for speed.\n"
5"\n"
6"Always use \"import string\" rather than referencing\n"
7"this module directly.";
Guido van Rossum983c9301997-12-29 19:52:29 +00008
Barry Warsawf5256011996-12-09 18:35:56 +00009#include "Python.h"
Guido van Rossume270b431992-09-03 20:21:07 +000010
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000011#include <ctype.h>
Guido van Rossume22e6441993-07-09 10:51:31 +000012/* XXX This file assumes that the <ctype.h> is*() functions
13 XXX are defined for all 8-bit characters! */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000014
Guido van Rossum7999a5c1996-08-08 19:16:15 +000015/* The lstrip(), rstrip() and strip() functions are implemented
16 in do_strip(), which uses an additional parameter to indicate what
17 type of strip should occur. */
18
19#define LEFTSTRIP 0
20#define RIGHTSTRIP 1
21#define BOTHSTRIP 2
22
Guido van Rossume270b431992-09-03 20:21:07 +000023
Barry Warsawf5256011996-12-09 18:35:56 +000024static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000025split_whitespace(char *s, int len, int maxsplit)
Guido van Rossum009e79b1995-05-03 17:40:23 +000026{
Barry Warsawe8fc29c1997-01-03 22:45:34 +000027 int i = 0, j, err;
28 int countsplit = 0;
29 PyObject* item;
30 PyObject *list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +000031
Guido van Rossume270b431992-09-03 20:21:07 +000032 if (list == NULL)
33 return NULL;
34
Guido van Rossume270b431992-09-03 20:21:07 +000035 while (i < len) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +000036 while (i < len && isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000037 i = i+1;
38 }
39 j = i;
Guido van Rossumee1813d1995-02-14 00:58:59 +000040 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000041 i = i+1;
42 }
43 if (j < i) {
Barry Warsawf5256011996-12-09 18:35:56 +000044 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000045 if (item == NULL)
46 goto finally;
47
Barry Warsawf5256011996-12-09 18:35:56 +000048 err = PyList_Append(list, item);
49 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000050 if (err < 0)
51 goto finally;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000052
53 countsplit++;
Barry Warsaw93be92d1997-12-02 00:29:30 +000054 while (i < len && isspace(Py_CHARMASK(s[i]))) {
55 i = i+1;
56 }
57 if (maxsplit && (countsplit >= maxsplit) && i < len) {
Barry Warsawf5256011996-12-09 18:35:56 +000058 item = PyString_FromStringAndSize(
59 s+i, (int)(len - i));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000060 if (item == NULL)
61 goto finally;
62
Barry Warsawf5256011996-12-09 18:35:56 +000063 err = PyList_Append(list, item);
64 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000065 if (err < 0)
66 goto finally;
67
Guido van Rossum7999a5c1996-08-08 19:16:15 +000068 i = len;
69 }
Guido van Rossume270b431992-09-03 20:21:07 +000070 }
71 }
Guido van Rossume270b431992-09-03 20:21:07 +000072 return list;
Barry Warsawe8fc29c1997-01-03 22:45:34 +000073 finally:
74 Py_DECREF(list);
75 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +000076}
77
78
Guido van Rossum983c9301997-12-29 19:52:29 +000079static char splitfields__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +000080"split(s [,sep [,maxsplit]]) -> list of strings\n"
81"splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
82"\n"
83"Return a list of the words in the string s, using sep as the\n"
84"delimiter string. If maxsplit is nonzero, splits into at most\n"
85"maxsplit words. If sep is not specified, any whitespace string\n"
86"is a separator. Maxsplit defaults to 0.\n"
87"\n"
88"(split and splitfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +000089
Barry Warsawf5256011996-12-09 18:35:56 +000090static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +000091strop_splitfields(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +000092{
Guido van Rossum572d2d91993-11-05 10:14:49 +000093 int len, n, i, j, err;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000094 int splitcount, maxsplit;
Guido van Rossume270b431992-09-03 20:21:07 +000095 char *s, *sub;
Barry Warsawf5256011996-12-09 18:35:56 +000096 PyObject *list, *item;
Guido van Rossume270b431992-09-03 20:21:07 +000097
Guido van Rossum009e79b1995-05-03 17:40:23 +000098 sub = NULL;
99 n = 0;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000100 splitcount = 0;
101 maxsplit = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000102 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
Guido van Rossume270b431992-09-03 20:21:07 +0000103 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000104 if (sub == NULL)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000105 return split_whitespace(s, len, maxsplit);
Guido van Rossume270b431992-09-03 20:21:07 +0000106 if (n == 0) {
Barry Warsawf5256011996-12-09 18:35:56 +0000107 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossume270b431992-09-03 20:21:07 +0000108 return NULL;
109 }
110
Barry Warsawf5256011996-12-09 18:35:56 +0000111 list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +0000112 if (list == NULL)
113 return NULL;
114
115 i = j = 0;
116 while (i+n <= len) {
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000117 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000118 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000119 if (item == NULL)
120 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000121 err = PyList_Append(list, item);
122 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000123 if (err < 0)
124 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000125 i = j = i + n;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000126 splitcount++;
127 if (maxsplit && (splitcount >= maxsplit))
128 break;
Guido van Rossume270b431992-09-03 20:21:07 +0000129 }
130 else
131 i++;
132 }
Barry Warsawf5256011996-12-09 18:35:56 +0000133 item = PyString_FromStringAndSize(s+j, (int)(len-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000134 if (item == NULL)
135 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000136 err = PyList_Append(list, item);
137 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000138 if (err < 0)
139 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000140
141 return list;
Guido van Rossum572d2d91993-11-05 10:14:49 +0000142
143 fail:
Barry Warsawf5256011996-12-09 18:35:56 +0000144 Py_DECREF(list);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000145 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +0000146}
147
148
Guido van Rossum983c9301997-12-29 19:52:29 +0000149static char joinfields__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000150"join(list [,sep]) -> string\n"
151"joinfields(list [,sep]) -> string\n"
152"\n"
153"Return a string composed of the words in list, with\n"
154"intervening occurrences of sep. Sep defaults to a single\n"
155"space.\n"
156"\n"
157"(join and joinfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +0000158
Barry Warsawf5256011996-12-09 18:35:56 +0000159static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000160strop_joinfields(PyObject *self, PyObject *args)
Guido van Rossumc89705d1992-11-26 08:54:07 +0000161{
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000162 PyObject *seq;
163 char *sep = NULL;
164 int seqlen, seplen = 0;
165 int i, reslen = 0, slen = 0, sz = 100;
166 PyObject *res = NULL;
167 char* p = NULL;
168 intargfunc getitemfunc;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000169
Guido van Rossum43713e52000-02-29 13:59:29 +0000170 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
Guido van Rossumc89705d1992-11-26 08:54:07 +0000171 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000172 if (sep == NULL) {
173 sep = " ";
174 seplen = 1;
175 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000176
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000177 seqlen = PySequence_Size(seq);
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000178 if (seqlen < 0 && PyErr_Occurred())
179 return NULL;
180
181 if (seqlen == 1) {
182 /* Optimization if there's only one item */
183 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000184 if (item && !PyString_Check(item)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000185 PyErr_SetString(PyExc_TypeError,
186 "first argument must be sequence of strings");
Guido van Rossumbf338301998-10-19 13:38:36 +0000187 Py_DECREF(item);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000188 return NULL;
189 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000190 return item;
191 }
192
193 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
194 return NULL;
195 p = PyString_AsString(res);
196
197 /* optimize for lists, since it's the most common case. all others
198 * (tuples and arbitrary sequences) just use the sequence abstract
199 * interface.
Barry Warsaw04d2d151997-01-03 23:46:51 +0000200 */
201 if (PyList_Check(seq)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000202 for (i = 0; i < seqlen; i++) {
203 PyObject *item = PyList_GET_ITEM(seq, i);
204 if (!PyString_Check(item)) {
205 PyErr_SetString(PyExc_TypeError,
206 "first argument must be sequence of strings");
207 Py_DECREF(res);
208 return NULL;
209 }
210 slen = PyString_GET_SIZE(item);
211 while (reslen + slen + seplen >= sz) {
212 if (_PyString_Resize(&res, sz * 2)) {
213 Py_DECREF(res);
214 return NULL;
215 }
216 sz *= 2;
217 p = PyString_AsString(res) + reslen;
218 }
219 if (i > 0) {
220 memcpy(p, sep, seplen);
221 p += seplen;
222 reslen += seplen;
223 }
224 memcpy(p, PyString_AS_STRING(item), slen);
225 p += slen;
226 reslen += slen;
227 }
228 if (_PyString_Resize(&res, reslen)) {
229 Py_DECREF(res);
230 res = NULL;
231 }
232 return res;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000233 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000234
235 if (seq->ob_type->tp_as_sequence == NULL ||
236 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
237 {
Barry Warsawf5256011996-12-09 18:35:56 +0000238 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000239 "first argument must be a sequence");
Guido van Rossumc89705d1992-11-26 08:54:07 +0000240 return NULL;
241 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000242 /* This is now type safe */
Guido van Rossumc89705d1992-11-26 08:54:07 +0000243 for (i = 0; i < seqlen; i++) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000244 PyObject *item = getitemfunc(seq, i);
245 if (!item || !PyString_Check(item)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000246 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000247 "first argument must be sequence of strings");
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000248 Py_DECREF(res);
249 Py_XDECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000250 return NULL;
251 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000252 slen = PyString_GET_SIZE(item);
253 while (reslen + slen + seplen >= sz) {
254 if (_PyString_Resize(&res, sz * 2)) {
255 Py_DECREF(res);
256 Py_DECREF(item);
257 return NULL;
258 }
259 sz *= 2;
260 p = PyString_AsString(res) + reslen;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000261 }
Guido van Rossumc89705d1992-11-26 08:54:07 +0000262 if (i > 0) {
263 memcpy(p, sep, seplen);
264 p += seplen;
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000265 reslen += seplen;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000266 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000267 memcpy(p, PyString_AS_STRING(item), slen);
268 p += slen;
269 reslen += slen;
270 Py_DECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000271 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000272 if (_PyString_Resize(&res, reslen)) {
273 Py_DECREF(res);
274 res = NULL;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000275 }
276 return res;
277}
278
Guido van Rossum983c9301997-12-29 19:52:29 +0000279
280static char find__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000281"find(s, sub [,start [,end]]) -> in\n"
282"\n"
283"Return the lowest index in s where substring sub is found,\n"
284"such that sub is contained within s[start,end]. Optional\n"
285"arguments start and end are interpreted as in slice notation.\n"
286"\n"
287"Return -1 on failure.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000288
Barry Warsawf5256011996-12-09 18:35:56 +0000289static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000290strop_find(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000291{
292 char *s, *sub;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000293 int len, n, i = 0, last = INT_MAX;
Guido van Rossume270b431992-09-03 20:21:07 +0000294
Guido van Rossum43713e52000-02-29 13:59:29 +0000295 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000296 return NULL;
297
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000298 if (last > len)
299 last = len;
300 if (last < 0)
301 last += len;
302 if (last < 0)
303 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000304 if (i < 0)
305 i += len;
306 if (i < 0)
Guido van Rossume270b431992-09-03 20:21:07 +0000307 i = 0;
Guido van Rossume270b431992-09-03 20:21:07 +0000308
Guido van Rossum031c6311998-03-24 04:19:22 +0000309 if (n == 0 && i <= last)
Barry Warsawf5256011996-12-09 18:35:56 +0000310 return PyInt_FromLong((long)i);
Guido van Rossume270b431992-09-03 20:21:07 +0000311
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000312 last -= n;
313 for (; i <= last; ++i)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000314 if (s[i] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000315 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000316 return PyInt_FromLong((long)i);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000317
Barry Warsawf5256011996-12-09 18:35:56 +0000318 return PyInt_FromLong(-1L);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000319}
320
321
Guido van Rossum983c9301997-12-29 19:52:29 +0000322static char rfind__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000323"rfind(s, sub [,start [,end]]) -> int\n"
324"\n"
325"Return the highest index in s where substring sub is found,\n"
326"such that sub is contained within s[start,end]. Optional\n"
327"arguments start and end are interpreted as in slice notation.\n"
328"\n"
329"Return -1 on failure.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000330
Barry Warsawf5256011996-12-09 18:35:56 +0000331static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000332strop_rfind(PyObject *self, PyObject *args)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000333{
334 char *s, *sub;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000335 int len, n, j;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000336 int i = 0, last = INT_MAX;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000337
Guido van Rossum43713e52000-02-29 13:59:29 +0000338 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000339 return NULL;
340
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000341 if (last > len)
342 last = len;
343 if (last < 0)
344 last += len;
345 if (last < 0)
346 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000347 if (i < 0)
348 i += len;
349 if (i < 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000350 i = 0;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000351
Guido van Rossum031c6311998-03-24 04:19:22 +0000352 if (n == 0 && i <= last)
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000353 return PyInt_FromLong((long)last);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000354
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000355 for (j = last-n; j >= i; --j)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000356 if (s[j] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000357 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000358 return PyInt_FromLong((long)j);
Guido van Rossume270b431992-09-03 20:21:07 +0000359
Barry Warsawf5256011996-12-09 18:35:56 +0000360 return PyInt_FromLong(-1L);
Guido van Rossume270b431992-09-03 20:21:07 +0000361}
362
Guido van Rossum983c9301997-12-29 19:52:29 +0000363
Barry Warsawf5256011996-12-09 18:35:56 +0000364static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000365do_strip(PyObject *args, int striptype)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000366{
367 char *s;
368 int len, i, j;
369
370
Guido van Rossum7e488981998-10-08 02:25:24 +0000371 if (!PyArg_Parse(args, "t#", &s, &len))
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000372 return NULL;
373
374 i = 0;
375 if (striptype != RIGHTSTRIP) {
376 while (i < len && isspace(Py_CHARMASK(s[i]))) {
377 i++;
378 }
379 }
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000380
381 j = len;
382 if (striptype != LEFTSTRIP) {
383 do {
384 j--;
385 } while (j >= i && isspace(Py_CHARMASK(s[j])));
386 j++;
387 }
388
389 if (i == 0 && j == len) {
Barry Warsawf5256011996-12-09 18:35:56 +0000390 Py_INCREF(args);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000391 return args;
392 }
393 else
Barry Warsawf5256011996-12-09 18:35:56 +0000394 return PyString_FromStringAndSize(s+i, j-i);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000395}
396
Guido van Rossume270b431992-09-03 20:21:07 +0000397
Guido van Rossum983c9301997-12-29 19:52:29 +0000398static char strip__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000399"strip(s) -> string\n"
400"\n"
401"Return a copy of the string s with leading and trailing\n"
402"whitespace removed.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000403
Barry Warsawf5256011996-12-09 18:35:56 +0000404static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000405strop_strip(PyObject *self, PyObject *args)
Guido van Rossume270b431992-09-03 20:21:07 +0000406{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000407 return do_strip(args, BOTHSTRIP);
408}
Guido van Rossume270b431992-09-03 20:21:07 +0000409
Guido van Rossum983c9301997-12-29 19:52:29 +0000410
411static char lstrip__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000412"lstrip(s) -> string\n"
413"\n"
414"Return a copy of the string s with leading whitespace removed.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000415
Barry Warsawf5256011996-12-09 18:35:56 +0000416static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000417strop_lstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000418{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000419 return do_strip(args, LEFTSTRIP);
420}
Guido van Rossume270b431992-09-03 20:21:07 +0000421
Guido van Rossum983c9301997-12-29 19:52:29 +0000422
423static char rstrip__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000424"rstrip(s) -> string\n"
425"\n"
426"Return a copy of the string s with trailing whitespace removed.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000427
Barry Warsawf5256011996-12-09 18:35:56 +0000428static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000429strop_rstrip(PyObject *self, PyObject *args)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000430{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000431 return do_strip(args, RIGHTSTRIP);
Guido van Rossume270b431992-09-03 20:21:07 +0000432}
433
434
Guido van Rossum983c9301997-12-29 19:52:29 +0000435static char lower__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000436"lower(s) -> string\n"
437"\n"
438"Return a copy of the string s converted to lowercase.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000439
Barry Warsawf5256011996-12-09 18:35:56 +0000440static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000441strop_lower(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000442{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000443 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000444 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000445 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000446 int changed;
447
Guido van Rossum7e488981998-10-08 02:25:24 +0000448 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000449 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000450 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000451 if (new == NULL)
452 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000453 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000454 changed = 0;
455 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000456 int c = Py_CHARMASK(*s++);
Barry Warsaw04d2d151997-01-03 23:46:51 +0000457 if (isupper(c)) {
Guido van Rossum5c850621992-09-11 23:55:51 +0000458 changed = 1;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000459 *s_new = tolower(c);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000460 } else
461 *s_new = c;
462 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000463 }
464 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000465 Py_DECREF(new);
466 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000467 return args;
468 }
469 return new;
470}
471
472
Guido van Rossum983c9301997-12-29 19:52:29 +0000473static char upper__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000474"upper(s) -> string\n"
475"\n"
476"Return a copy of the string s converted to uppercase.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000477
Barry Warsawf5256011996-12-09 18:35:56 +0000478static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000479strop_upper(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000480{
Barry Warsaw04d2d151997-01-03 23:46:51 +0000481 char *s, *s_new;
482 int i, n;
483 PyObject *new;
484 int changed;
485
Guido van Rossum7e488981998-10-08 02:25:24 +0000486 if (!PyArg_Parse(args, "t#", &s, &n))
Barry Warsaw04d2d151997-01-03 23:46:51 +0000487 return NULL;
488 new = PyString_FromStringAndSize(NULL, n);
489 if (new == NULL)
490 return NULL;
491 s_new = PyString_AsString(new);
492 changed = 0;
493 for (i = 0; i < n; i++) {
494 int c = Py_CHARMASK(*s++);
495 if (islower(c)) {
496 changed = 1;
497 *s_new = toupper(c);
498 } else
499 *s_new = c;
500 s_new++;
501 }
502 if (!changed) {
503 Py_DECREF(new);
504 Py_INCREF(args);
505 return args;
506 }
507 return new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000508}
509
510
Guido van Rossum983c9301997-12-29 19:52:29 +0000511static char capitalize__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000512"capitalize(s) -> string\n"
513"\n"
514"Return a copy of the string s with only its first character\n"
515"capitalized.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000516
Barry Warsawf5256011996-12-09 18:35:56 +0000517static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000518strop_capitalize(PyObject *self, PyObject *args)
Guido van Rossum27457531996-06-12 04:24:52 +0000519{
520 char *s, *s_new;
521 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000522 PyObject *new;
Guido van Rossum27457531996-06-12 04:24:52 +0000523 int changed;
524
Guido van Rossum7e488981998-10-08 02:25:24 +0000525 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum27457531996-06-12 04:24:52 +0000526 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000527 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum27457531996-06-12 04:24:52 +0000528 if (new == NULL)
529 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000530 s_new = PyString_AsString(new);
Guido van Rossum27457531996-06-12 04:24:52 +0000531 changed = 0;
Guido van Rossum529c9631996-06-17 16:59:33 +0000532 if (0 < n) {
Guido van Rossum27457531996-06-12 04:24:52 +0000533 int c = Py_CHARMASK(*s++);
534 if (islower(c)) {
535 changed = 1;
536 *s_new = toupper(c);
537 } else
538 *s_new = c;
539 s_new++;
540 }
541 for (i = 1; i < n; i++) {
542 int c = Py_CHARMASK(*s++);
543 if (isupper(c)) {
544 changed = 1;
545 *s_new = tolower(c);
546 } else
547 *s_new = c;
548 s_new++;
549 }
550 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000551 Py_DECREF(new);
552 Py_INCREF(args);
Guido van Rossum27457531996-06-12 04:24:52 +0000553 return args;
554 }
555 return new;
556}
557
558
Guido van Rossum54ec2881999-01-25 22:36:24 +0000559static char expandtabs__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000560"expandtabs(string, [tabsize]) -> string\n"
561"\n"
562"Expand tabs in a string, i.e. replace them by one or more spaces,\n"
563"depending on the current column and the given tab size (default 8).\n"
564"The column number is reset to zero after each newline occurring in the\n"
565"string. This doesn't understand other non-printing characters.";
Guido van Rossum54ec2881999-01-25 22:36:24 +0000566
567static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000568strop_expandtabs(PyObject *self, PyObject *args)
Guido van Rossum54ec2881999-01-25 22:36:24 +0000569{
570 /* Original by Fredrik Lundh */
571 char* e;
572 char* p;
573 char* q;
574 int i, j;
575 PyObject* out;
576 char* string;
577 int stringlen;
578 int tabsize = 8;
579
580 /* Get arguments */
Guido van Rossum43713e52000-02-29 13:59:29 +0000581 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
Guido van Rossum54ec2881999-01-25 22:36:24 +0000582 return NULL;
583 if (tabsize < 1) {
584 PyErr_SetString(PyExc_ValueError,
585 "tabsize must be at least 1");
586 return NULL;
587 }
588
589 /* First pass: determine size of output string */
590 i = j = 0; /* j: current column; i: total of previous lines */
591 e = string + stringlen;
592 for (p = string; p < e; p++) {
593 if (*p == '\t')
594 j += tabsize - (j%tabsize);
595 else {
596 j++;
597 if (*p == '\n') {
598 i += j;
599 j = 0;
600 }
601 }
602 }
603
604 /* Second pass: create output string and fill it */
605 out = PyString_FromStringAndSize(NULL, i+j);
606 if (out == NULL)
607 return NULL;
608
609 i = 0;
610 q = PyString_AS_STRING(out);
611
612 for (p = string; p < e; p++) {
613 if (*p == '\t') {
614 j = tabsize - (i%tabsize);
615 i += j;
616 while (j-- > 0)
617 *q++ = ' ';
618 } else {
619 *q++ = *p;
620 i++;
621 if (*p == '\n')
622 i = 0;
623 }
624 }
625
626 return out;
627}
628
629
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000630static char count__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000631"count(s, sub[, start[, end]]) -> int\n"
632"\n"
633"Return the number of occurrences of substring sub in string\n"
634"s[start:end]. Optional arguments start and end are\n"
635"interpreted as in slice notation.";
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000636
637static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000638strop_count(PyObject *self, PyObject *args)
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000639{
640 char *s, *sub;
Guido van Rossumc5015831998-10-07 16:36:14 +0000641 int len, n;
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000642 int i = 0, last = INT_MAX;
643 int m, r;
644
Guido van Rossum43713e52000-02-29 13:59:29 +0000645 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000646 return NULL;
647 if (last > len)
648 last = len;
649 if (last < 0)
650 last += len;
651 if (last < 0)
652 last = 0;
653 if (i < 0)
654 i += len;
655 if (i < 0)
656 i = 0;
657 m = last + 1 - n;
658 if (n == 0)
659 return PyInt_FromLong((long) (m-i));
660
661 r = 0;
662 while (i < m) {
663 if (!memcmp(s+i, sub, n)) {
664 r++;
665 i += n;
666 } else {
667 i++;
668 }
669 }
670 return PyInt_FromLong((long) r);
671}
672
673
Guido van Rossum983c9301997-12-29 19:52:29 +0000674static char swapcase__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000675"swapcase(s) -> string\n"
676"\n"
677"Return a copy of the string s with upper case characters\n"
678"converted to lowercase and vice versa.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000679
Barry Warsawf5256011996-12-09 18:35:56 +0000680static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000681strop_swapcase(PyObject *self, PyObject *args)
Guido van Rossum5c850621992-09-11 23:55:51 +0000682{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000683 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000684 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000685 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000686 int changed;
687
Guido van Rossum7e488981998-10-08 02:25:24 +0000688 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000689 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000690 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000691 if (new == NULL)
692 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000693 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000694 changed = 0;
695 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000696 int c = Py_CHARMASK(*s++);
Guido van Rossum5c850621992-09-11 23:55:51 +0000697 if (islower(c)) {
698 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000699 *s_new = toupper(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000700 }
701 else if (isupper(c)) {
702 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000703 *s_new = tolower(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000704 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000705 else
706 *s_new = c;
707 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000708 }
709 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000710 Py_DECREF(new);
711 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000712 return args;
713 }
714 return new;
715}
716
717
Guido van Rossum983c9301997-12-29 19:52:29 +0000718static char atoi__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000719"atoi(s [,base]) -> int\n"
720"\n"
721"Return the integer represented by the string s in the given\n"
722"base, which defaults to 10. The string s must consist of one\n"
723"or more digits, possibly preceded by a sign. If base is 0, it\n"
724"is chosen from the leading characters of s, 0 for octal, 0x or\n"
725"0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n"
726"accepted.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000727
Barry Warsawf5256011996-12-09 18:35:56 +0000728static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000729strop_atoi(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000730{
Guido van Rossumb6775db1994-08-01 11:34:53 +0000731 char *s, *end;
732 int base = 10;
733 long x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000734 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000735
Guido van Rossum43713e52000-02-29 13:59:29 +0000736 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000737 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000738
739 if ((base != 0 && base < 2) || base > 36) {
740 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
741 return NULL;
742 }
743
Guido van Rossumc35f9331996-09-11 23:30:42 +0000744 while (*s && isspace(Py_CHARMASK(*s)))
745 s++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000746 errno = 0;
747 if (base == 0 && s[0] == '0')
Barry Warsawf5256011996-12-09 18:35:56 +0000748 x = (long) PyOS_strtoul(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000749 else
Barry Warsawf5256011996-12-09 18:35:56 +0000750 x = PyOS_strtol(s, &end, base);
Guido van Rossum5bd69db1999-02-22 16:18:44 +0000751 if (end == s || !isalnum(end[-1]))
Guido van Rossum923fece51998-08-04 15:04:52 +0000752 goto bad;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000753 while (*end && isspace(Py_CHARMASK(*end)))
754 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000755 if (*end != '\0') {
Guido van Rossum923fece51998-08-04 15:04:52 +0000756 bad:
Guido van Rossumc35f9331996-09-11 23:30:42 +0000757 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000758 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000759 return NULL;
760 }
761 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000762 sprintf(buffer, "atoi() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000763 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000764 return NULL;
765 }
Barry Warsawf5256011996-12-09 18:35:56 +0000766 return PyInt_FromLong(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000767}
768
769
Guido van Rossum983c9301997-12-29 19:52:29 +0000770static char atol__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000771"atol(s [,base]) -> long\n"
772"\n"
773"Return the long integer represented by the string s in the\n"
774"given base, which defaults to 10. The string s must consist\n"
775"of one or more digits, possibly preceded by a sign. If base\n"
776"is 0, it is chosen from the leading characters of s, 0 for\n"
777"octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n"
778"0x or 0X is accepted. A trailing L or l is not accepted,\n"
779"unless base is 0.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000780
Barry Warsawf5256011996-12-09 18:35:56 +0000781static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000782strop_atol(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000783{
784 char *s, *end;
785 int base = 10;
Barry Warsawf5256011996-12-09 18:35:56 +0000786 PyObject *x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000787 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000788
Guido van Rossum43713e52000-02-29 13:59:29 +0000789 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000790 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000791
792 if ((base != 0 && base < 2) || base > 36) {
793 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
794 return NULL;
795 }
796
Guido van Rossumc35f9331996-09-11 23:30:42 +0000797 while (*s && isspace(Py_CHARMASK(*s)))
798 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000799 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000800 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000801 return NULL;
802 }
Barry Warsawf5256011996-12-09 18:35:56 +0000803 x = PyLong_FromString(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000804 if (x == NULL)
805 return NULL;
806 if (base == 0 && (*end == 'l' || *end == 'L'))
807 end++;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000808 while (*end && isspace(Py_CHARMASK(*end)))
809 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000810 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000811 sprintf(buffer, "invalid literal for atol(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000812 PyErr_SetString(PyExc_ValueError, buffer);
813 Py_DECREF(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000814 return NULL;
815 }
816 return x;
817}
818
819
Guido van Rossum983c9301997-12-29 19:52:29 +0000820static char atof__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000821"atof(s) -> float\n"
822"\n"
823"Return the floating point number represented by the string s.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000824
Barry Warsawf5256011996-12-09 18:35:56 +0000825static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000826strop_atof(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000827{
Tim Petersdbd9ba62000-07-09 03:09:57 +0000828 extern double strtod(const char *, char **);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000829 char *s, *end;
830 double x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000831 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000832
Guido van Rossum43713e52000-02-29 13:59:29 +0000833 if (!PyArg_ParseTuple(args, "s:atof", &s))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000834 return NULL;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000835 while (*s && isspace(Py_CHARMASK(*s)))
836 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000837 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000838 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000839 return NULL;
840 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000841 errno = 0;
Guido van Rossum52fa3a61997-02-14 22:59:58 +0000842 PyFPE_START_PROTECT("strop_atof", return 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000843 x = strtod(s, &end);
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000844 PyFPE_END_PROTECT(x)
Guido van Rossumc35f9331996-09-11 23:30:42 +0000845 while (*end && isspace(Py_CHARMASK(*end)))
846 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000847 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000848 sprintf(buffer, "invalid literal for atof(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000849 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000850 return NULL;
851 }
852 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000853 sprintf(buffer, "atof() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000854 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000855 return NULL;
856 }
Barry Warsawf5256011996-12-09 18:35:56 +0000857 return PyFloat_FromDouble(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000858}
859
860
Guido van Rossum983c9301997-12-29 19:52:29 +0000861static char maketrans__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000862"maketrans(frm, to) -> string\n"
863"\n"
864"Return a translation table (a string of 256 bytes long)\n"
865"suitable for use in string.translate. The strings frm and to\n"
866"must be of the same length.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000867
Guido van Rossumed7253c1996-07-23 18:12:39 +0000868static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000869strop_maketrans(PyObject *self, PyObject *args)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000870{
Guido van Rossume0548b81997-01-06 16:50:09 +0000871 unsigned char *c, *from=NULL, *to=NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000872 int i, fromlen=0, tolen=0;
Guido van Rossume0548b81997-01-06 16:50:09 +0000873 PyObject *result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000874
Guido van Rossum43713e52000-02-29 13:59:29 +0000875 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000876 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000877
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000878 if (fromlen != tolen) {
Barry Warsawf5256011996-12-09 18:35:56 +0000879 PyErr_SetString(PyExc_ValueError,
Guido van Rossumed7253c1996-07-23 18:12:39 +0000880 "maketrans arguments must have same length");
881 return NULL;
882 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000883
884 result = PyString_FromStringAndSize((char *)NULL, 256);
885 if (result == NULL)
886 return NULL;
887 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000888 for (i = 0; i < 256; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000889 c[i]=(unsigned char)i;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000890 for (i = 0; i < fromlen; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000891 c[from[i]]=to[i];
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000892
Guido van Rossume0548b81997-01-06 16:50:09 +0000893 return result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000894}
895
896
Guido van Rossum983c9301997-12-29 19:52:29 +0000897static char translate__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +0000898"translate(s,table [,deletechars]) -> string\n"
899"\n"
900"Return a copy of the string s, where all characters occurring\n"
901"in the optional argument deletechars are removed, and the\n"
902"remaining characters have been mapped through the given\n"
903"translation table, which must be a string of length 256.";
Guido van Rossum983c9301997-12-29 19:52:29 +0000904
Barry Warsawf5256011996-12-09 18:35:56 +0000905static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +0000906strop_translate(PyObject *self, PyObject *args)
Guido van Rossuma3127e81995-09-13 17:39:06 +0000907{
Guido van Rossume0548b81997-01-06 16:50:09 +0000908 register char *input, *table, *output;
909 register int i, c, changed = 0;
910 PyObject *input_obj;
911 char *table1, *output_start, *del_table=NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000912 int inlen, tablen, dellen = 0;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000913 PyObject *result;
Guido van Rossume0548b81997-01-06 16:50:09 +0000914 int trans_table[256];
Guido van Rossuma3127e81995-09-13 17:39:06 +0000915
Guido van Rossum43713e52000-02-29 13:59:29 +0000916 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
Guido van Rossume0548b81997-01-06 16:50:09 +0000917 &table1, &tablen, &del_table, &dellen))
Guido van Rossuma3127e81995-09-13 17:39:06 +0000918 return NULL;
919 if (tablen != 256) {
Barry Warsawf5256011996-12-09 18:35:56 +0000920 PyErr_SetString(PyExc_ValueError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000921 "translation table must be 256 characters long");
Guido van Rossuma3127e81995-09-13 17:39:06 +0000922 return NULL;
923 }
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000924
Guido van Rossume0548b81997-01-06 16:50:09 +0000925 table = table1;
926 inlen = PyString_Size(input_obj);
Guido van Rossumed7253c1996-07-23 18:12:39 +0000927 result = PyString_FromStringAndSize((char *)NULL, inlen);
Guido van Rossuma3127e81995-09-13 17:39:06 +0000928 if (result == NULL)
929 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000930 output_start = output = PyString_AsString(result);
Guido van Rossume0548b81997-01-06 16:50:09 +0000931 input = PyString_AsString(input_obj);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000932
Guido van Rossume0548b81997-01-06 16:50:09 +0000933 if (dellen == 0) {
934 /* If no deletions are required, use faster code */
935 for (i = inlen; --i >= 0; ) {
936 c = Py_CHARMASK(*input++);
937 if (Py_CHARMASK((*output++ = table[c])) != c)
938 changed = 1;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000939 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000940 if (changed)
941 return result;
942 Py_DECREF(result);
943 Py_INCREF(input_obj);
944 return input_obj;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000945 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000946
947 for (i = 0; i < 256; i++)
948 trans_table[i] = Py_CHARMASK(table[i]);
949
Guido van Rossum983c9301997-12-29 19:52:29 +0000950 for (i = 0; i < dellen; i++)
Guido van Rossum1ed5e571997-04-29 21:34:16 +0000951 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Guido van Rossume0548b81997-01-06 16:50:09 +0000952
953 for (i = inlen; --i >= 0; ) {
954 c = Py_CHARMASK(*input++);
955 if (trans_table[c] != -1)
956 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
957 continue;
958 changed = 1;
959 }
960 if (!changed) {
961 Py_DECREF(result);
962 Py_INCREF(input_obj);
963 return input_obj;
964 }
965 /* Fix the size of the resulting string */
966 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
Guido van Rossum983c9301997-12-29 19:52:29 +0000967 return NULL;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000968 return result;
969}
970
971
Guido van Rossum101923b1997-04-02 06:11:18 +0000972/* What follows is used for implementing replace(). Perry Stoll. */
973
974/*
975 mymemfind
976
977 strstr replacement for arbitrary blocks of memory.
978
Barry Warsaw51ac5802000-03-20 16:36:48 +0000979 Locates the first occurrence in the memory pointed to by MEM of the
Guido van Rossum101923b1997-04-02 06:11:18 +0000980 contents of memory pointed to by PAT. Returns the index into MEM if
981 found, or -1 if not found. If len of PAT is greater than length of
Guido van Rossum983c9301997-12-29 19:52:29 +0000982 MEM, the function returns -1.
Guido van Rossum101923b1997-04-02 06:11:18 +0000983*/
Tim Peters0f8b4942001-05-09 22:15:03 +0000984static int
Tim Peters4cd44ef2001-05-10 00:05:33 +0000985mymemfind(const char *mem, int len, const char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +0000986{
987 register int ii;
988
989 /* pattern can not occur in the last pat_len-1 chars */
990 len -= pat_len;
991
992 for (ii = 0; ii <= len; ii++) {
993 if (mem[ii] == pat[0] &&
994 (pat_len == 1 ||
995 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
996 return ii;
997 }
998 }
999 return -1;
1000}
1001
1002/*
1003 mymemcnt
1004
1005 Return the number of distinct times PAT is found in MEM.
1006 meaning mem=1111 and pat==11 returns 2.
1007 mem=11111 and pat==11 also return 2.
1008 */
Tim Peters0f8b4942001-05-09 22:15:03 +00001009static int
Tim Peters4cd44ef2001-05-10 00:05:33 +00001010mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001011{
1012 register int offset = 0;
1013 int nfound = 0;
1014
1015 while (len >= 0) {
1016 offset = mymemfind(mem, len, pat, pat_len);
1017 if (offset == -1)
1018 break;
1019 mem += offset + pat_len;
1020 len -= offset + pat_len;
1021 nfound++;
1022 }
1023 return nfound;
1024}
1025
Guido van Rossum983c9301997-12-29 19:52:29 +00001026/*
Guido van Rossum101923b1997-04-02 06:11:18 +00001027 mymemreplace
1028
Thomas Wouters7e474022000-07-16 12:04:32 +00001029 Return a string in which all occurrences of PAT in memory STR are
Guido van Rossum983c9301997-12-29 19:52:29 +00001030 replaced with SUB.
Guido van Rossum101923b1997-04-02 06:11:18 +00001031
Thomas Wouters7e474022000-07-16 12:04:32 +00001032 If length of PAT is less than length of STR or there are no occurrences
Guido van Rossum101923b1997-04-02 06:11:18 +00001033 of PAT in STR, then the original string is returned. Otherwise, a new
1034 string is allocated here and returned.
Guido van Rossum983c9301997-12-29 19:52:29 +00001035
Guido van Rossum101923b1997-04-02 06:11:18 +00001036 on return, out_len is:
1037 the length of output string, or
1038 -1 if the input string is returned, or
1039 unchanged if an error occurs (no memory).
1040
1041 return value is:
1042 the new string allocated locally, or
1043 NULL if an error occurred.
1044*/
Tim Peters0f8b4942001-05-09 22:15:03 +00001045static char *
Tim Peters4cd44ef2001-05-10 00:05:33 +00001046mymemreplace(const char *str, int len, /* input string */
1047 const char *pat, int pat_len, /* pattern string to find */
1048 const char *sub, int sub_len, /* substitution string */
1049 int count, /* number of replacements */
1050 int *out_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001051{
1052 char *out_s;
1053 char *new_s;
1054 int nfound, offset, new_len;
1055
1056 if (len == 0 || pat_len > len)
1057 goto return_same;
1058
1059 /* find length of output string */
1060 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001061 if (count < 0)
1062 count = INT_MAX;
1063 else if (nfound > count)
1064 nfound = count;
Guido van Rossum101923b1997-04-02 06:11:18 +00001065 if (nfound == 0)
1066 goto return_same;
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001067
Guido van Rossum101923b1997-04-02 06:11:18 +00001068 new_len = len + nfound*(sub_len - pat_len);
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001069 if (new_len == 0) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001070 /* Have to allocate something for the caller to free(). */
1071 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001072 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001073 return NULL;
1074 out_s[0] = '\0';
Guido van Rossum101923b1997-04-02 06:11:18 +00001075 }
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001076 else {
1077 assert(new_len > 0);
1078 new_s = (char *)PyMem_MALLOC(new_len);
1079 if (new_s == NULL)
1080 return NULL;
1081 out_s = new_s;
1082
Tim Peters9c012af2001-05-10 00:32:57 +00001083 for (; count > 0 && len > 0; --count) {
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001084 /* find index of next instance of pattern */
1085 offset = mymemfind(str, len, pat, pat_len);
1086 if (offset == -1)
1087 break;
1088
1089 /* copy non matching part of input string */
1090 memcpy(new_s, str, offset);
1091 str += offset + pat_len;
1092 len -= offset + pat_len;
1093
1094 /* copy substitute into the output string */
1095 new_s += offset;
1096 memcpy(new_s, sub, sub_len);
1097 new_s += sub_len;
Tim Peters1a7b3ee2001-05-09 23:00:26 +00001098 }
1099 /* copy any remaining values into output string */
1100 if (len > 0)
1101 memcpy(new_s, str, len);
1102 }
1103 *out_len = new_len;
Guido van Rossum101923b1997-04-02 06:11:18 +00001104 return out_s;
1105
1106 return_same:
1107 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001108 return (char *)str; /* cast away const */
Guido van Rossum101923b1997-04-02 06:11:18 +00001109}
1110
1111
Guido van Rossum983c9301997-12-29 19:52:29 +00001112static char replace__doc__[] =
Tim Peters0f8b4942001-05-09 22:15:03 +00001113"replace (str, old, new[, maxsplit]) -> string\n"
1114"\n"
1115"Return a copy of string str with all occurrences of substring\n"
1116"old replaced by new. If the optional argument maxsplit is\n"
1117"given, only the first maxsplit occurrences are replaced.";
Guido van Rossum983c9301997-12-29 19:52:29 +00001118
1119static PyObject *
Peter Schneider-Kamp8235f1c2000-07-10 09:43:24 +00001120strop_replace(PyObject *self, PyObject *args)
Guido van Rossum101923b1997-04-02 06:11:18 +00001121{
1122 char *str, *pat,*sub,*new_s;
1123 int len,pat_len,sub_len,out_len;
Tim Petersda45d552001-05-10 00:59:45 +00001124 int count = -1;
Guido van Rossum101923b1997-04-02 06:11:18 +00001125 PyObject *new;
1126
Guido van Rossum43713e52000-02-29 13:59:29 +00001127 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
Barry Warsawf577c081997-11-29 00:10:07 +00001128 &str, &len, &pat, &pat_len, &sub, &sub_len,
1129 &count))
Guido van Rossum101923b1997-04-02 06:11:18 +00001130 return NULL;
Guido van Rossum4ccda151998-05-14 02:36:29 +00001131 if (pat_len <= 0) {
1132 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1133 return NULL;
1134 }
Barry Warsawf577c081997-11-29 00:10:07 +00001135 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001136 if (new_s == NULL) {
1137 PyErr_NoMemory();
1138 return NULL;
1139 }
1140 if (out_len == -1) {
1141 /* we're returning another reference to the input string */
1142 new = PyTuple_GetItem(args, 0);
1143 Py_XINCREF(new);
1144 }
1145 else {
1146 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001147 PyMem_FREE(new_s);
Guido van Rossum101923b1997-04-02 06:11:18 +00001148 }
1149 return new;
1150}
1151
1152
Guido van Rossume270b431992-09-03 20:21:07 +00001153/* List of functions defined in the module */
1154
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001155static PyMethodDef
1156strop_methods[] = {
Tim Peters0f8b4942001-05-09 22:15:03 +00001157 {"atof", strop_atof, METH_VARARGS, atof__doc__},
1158 {"atoi", strop_atoi, METH_VARARGS, atoi__doc__},
1159 {"atol", strop_atol, METH_VARARGS, atol__doc__},
1160 {"capitalize", strop_capitalize, METH_OLDARGS, capitalize__doc__},
1161 {"count", strop_count, METH_VARARGS, count__doc__},
1162 {"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__},
1163 {"find", strop_find, METH_VARARGS, find__doc__},
1164 {"join", strop_joinfields, METH_VARARGS, joinfields__doc__},
1165 {"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__},
1166 {"lstrip", strop_lstrip, METH_OLDARGS, lstrip__doc__},
1167 {"lower", strop_lower, METH_OLDARGS, lower__doc__},
1168 {"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__},
1169 {"replace", strop_replace, METH_VARARGS, replace__doc__},
1170 {"rfind", strop_rfind, METH_VARARGS, rfind__doc__},
1171 {"rstrip", strop_rstrip, METH_OLDARGS, rstrip__doc__},
1172 {"split", strop_splitfields, METH_VARARGS, splitfields__doc__},
1173 {"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
1174 {"strip", strop_strip, METH_OLDARGS, strip__doc__},
1175 {"swapcase", strop_swapcase, METH_OLDARGS, swapcase__doc__},
1176 {"translate", strop_translate, METH_VARARGS, translate__doc__},
1177 {"upper", strop_upper, METH_OLDARGS, upper__doc__},
Guido van Rossume270b431992-09-03 20:21:07 +00001178 {NULL, NULL} /* sentinel */
1179};
1180
1181
Guido van Rossum3886bb61998-12-04 18:50:17 +00001182DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00001183initstrop(void)
Guido van Rossume270b431992-09-03 20:21:07 +00001184{
Barry Warsawf5256011996-12-09 18:35:56 +00001185 PyObject *m, *d, *s;
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001186 char buf[256];
1187 int c, n;
Guido van Rossum983c9301997-12-29 19:52:29 +00001188 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1189 (PyObject*)NULL, PYTHON_API_VERSION);
Barry Warsawf5256011996-12-09 18:35:56 +00001190 d = PyModule_GetDict(m);
Guido van Rossume22e6441993-07-09 10:51:31 +00001191
1192 /* Create 'whitespace' object */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001193 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001194 for (c = 0; c < 256; c++) {
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001195 if (isspace(c))
1196 buf[n++] = c;
1197 }
Barry Warsawf5256011996-12-09 18:35:56 +00001198 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001199 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001200 PyDict_SetItemString(d, "whitespace", s);
1201 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001202 }
1203 /* Create 'lowercase' object */
1204 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001205 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001206 if (islower(c))
1207 buf[n++] = c;
1208 }
Barry Warsawf5256011996-12-09 18:35:56 +00001209 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001210 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001211 PyDict_SetItemString(d, "lowercase", s);
1212 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001213 }
1214
1215 /* Create 'uppercase' object */
1216 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001217 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001218 if (isupper(c))
1219 buf[n++] = c;
1220 }
Barry Warsawf5256011996-12-09 18:35:56 +00001221 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001222 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001223 PyDict_SetItemString(d, "uppercase", s);
1224 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001225 }
Guido van Rossume270b431992-09-03 20:21:07 +00001226}