blob: 7a56e3751feae0659bbd879bfbeb1361e37a6b25 [file] [log] [blame]
Guido van Rossume270b431992-09-03 20:21:07 +00001/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00002Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
Guido van Rossume270b431992-09-03 20:21:07 +00004
5 All Rights Reserved
6
Guido van Rossumd266eb41996-10-25 14:44:06 +00007Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
Guido van Rossume270b431992-09-03 20:21:07 +00009provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000010both that copyright notice and this permission notice appear in
Guido van Rossume270b431992-09-03 20:21:07 +000011supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
Guido van Rossume270b431992-09-03 20:21:07 +000016
Guido van Rossumd266eb41996-10-25 14:44:06 +000017While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
Guido van Rossume270b431992-09-03 20:21:07 +000029
30******************************************************************/
31
32/* strop module */
33
Guido van Rossum983c9301997-12-29 19:52:29 +000034static char strop_module__doc__[] =
Guido van Rossum23e21e71997-12-29 19:57:36 +000035"Common string manipulations, optimized for speed.\n\
Guido van Rossum786205e1997-12-30 05:10:14 +000036\n\
Guido van Rossum983c9301997-12-29 19:52:29 +000037Always use \"import string\" rather than referencing\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +000038this module directly.";
Guido van Rossum983c9301997-12-29 19:52:29 +000039
Barry Warsawf5256011996-12-09 18:35:56 +000040#include "Python.h"
Guido van Rossume270b431992-09-03 20:21:07 +000041
Guido van Rossum7b7c5781997-03-14 04:13:56 +000042#ifdef HAVE_LIMITS_H
43#include <limits.h>
44#else
45#define INT_MAX 2147483647
46#endif
47
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000048#include <ctype.h>
Guido van Rossume22e6441993-07-09 10:51:31 +000049/* XXX This file assumes that the <ctype.h> is*() functions
50 XXX are defined for all 8-bit characters! */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +000051
Guido van Rossum7999a5c1996-08-08 19:16:15 +000052/* The lstrip(), rstrip() and strip() functions are implemented
53 in do_strip(), which uses an additional parameter to indicate what
54 type of strip should occur. */
55
56#define LEFTSTRIP 0
57#define RIGHTSTRIP 1
58#define BOTHSTRIP 2
59
Guido van Rossume270b431992-09-03 20:21:07 +000060
Barry Warsawf5256011996-12-09 18:35:56 +000061static PyObject *
Guido van Rossum7999a5c1996-08-08 19:16:15 +000062split_whitespace(s, len, maxsplit)
Guido van Rossume270b431992-09-03 20:21:07 +000063 char *s;
Guido van Rossum009e79b1995-05-03 17:40:23 +000064 int len;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000065 int maxsplit;
Guido van Rossum009e79b1995-05-03 17:40:23 +000066{
Barry Warsawe8fc29c1997-01-03 22:45:34 +000067 int i = 0, j, err;
68 int countsplit = 0;
69 PyObject* item;
70 PyObject *list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +000071
Guido van Rossume270b431992-09-03 20:21:07 +000072 if (list == NULL)
73 return NULL;
74
Guido van Rossume270b431992-09-03 20:21:07 +000075 while (i < len) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +000076 while (i < len && isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000077 i = i+1;
78 }
79 j = i;
Guido van Rossumee1813d1995-02-14 00:58:59 +000080 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
Guido van Rossume270b431992-09-03 20:21:07 +000081 i = i+1;
82 }
83 if (j < i) {
Barry Warsawf5256011996-12-09 18:35:56 +000084 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Barry Warsawe8fc29c1997-01-03 22:45:34 +000085 if (item == NULL)
86 goto finally;
87
Barry Warsawf5256011996-12-09 18:35:56 +000088 err = PyList_Append(list, item);
89 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +000090 if (err < 0)
91 goto finally;
Guido van Rossum7999a5c1996-08-08 19:16:15 +000092
93 countsplit++;
Barry Warsaw93be92d1997-12-02 00:29:30 +000094 while (i < len && isspace(Py_CHARMASK(s[i]))) {
95 i = i+1;
96 }
97 if (maxsplit && (countsplit >= maxsplit) && i < len) {
Barry Warsawf5256011996-12-09 18:35:56 +000098 item = PyString_FromStringAndSize(
99 s+i, (int)(len - i));
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000100 if (item == NULL)
101 goto finally;
102
Barry Warsawf5256011996-12-09 18:35:56 +0000103 err = PyList_Append(list, item);
104 Py_DECREF(item);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000105 if (err < 0)
106 goto finally;
107
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000108 i = len;
109 }
Guido van Rossume270b431992-09-03 20:21:07 +0000110 }
111 }
Guido van Rossume270b431992-09-03 20:21:07 +0000112 return list;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000113 finally:
114 Py_DECREF(list);
115 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +0000116}
117
118
Guido van Rossum983c9301997-12-29 19:52:29 +0000119static char splitfields__doc__[] =
Fred Drakee4f13661999-11-04 19:19:48 +0000120"split(s [,sep [,maxsplit]]) -> list of strings\n\
121splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000122\n\
123Return a list of the words in the string s, using sep as the\n\
124delimiter string. If maxsplit is nonzero, splits into at most\n\
Fred Drakee4f13661999-11-04 19:19:48 +0000125maxsplit words. If sep is not specified, any whitespace string\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000126is a separator. Maxsplit defaults to 0.\n\
127\n\
128(split and splitfields are synonymous)";
129
Barry Warsawf5256011996-12-09 18:35:56 +0000130static PyObject *
Guido van Rossume270b431992-09-03 20:21:07 +0000131strop_splitfields(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000132 PyObject *self; /* Not used */
133 PyObject *args;
Guido van Rossume270b431992-09-03 20:21:07 +0000134{
Guido van Rossum572d2d91993-11-05 10:14:49 +0000135 int len, n, i, j, err;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000136 int splitcount, maxsplit;
Guido van Rossume270b431992-09-03 20:21:07 +0000137 char *s, *sub;
Barry Warsawf5256011996-12-09 18:35:56 +0000138 PyObject *list, *item;
Guido van Rossume270b431992-09-03 20:21:07 +0000139
Guido van Rossum009e79b1995-05-03 17:40:23 +0000140 sub = NULL;
141 n = 0;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000142 splitcount = 0;
143 maxsplit = 0;
Guido van Rossum7e488981998-10-08 02:25:24 +0000144 if (!PyArg_ParseTuple(args, "t#|z#i", &s, &len, &sub, &n, &maxsplit))
Guido van Rossume270b431992-09-03 20:21:07 +0000145 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000146 if (sub == NULL)
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000147 return split_whitespace(s, len, maxsplit);
Guido van Rossume270b431992-09-03 20:21:07 +0000148 if (n == 0) {
Barry Warsawf5256011996-12-09 18:35:56 +0000149 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossume270b431992-09-03 20:21:07 +0000150 return NULL;
151 }
152
Barry Warsawf5256011996-12-09 18:35:56 +0000153 list = PyList_New(0);
Guido van Rossume270b431992-09-03 20:21:07 +0000154 if (list == NULL)
155 return NULL;
156
157 i = j = 0;
158 while (i+n <= len) {
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000159 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000160 item = PyString_FromStringAndSize(s+j, (int)(i-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000161 if (item == NULL)
162 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000163 err = PyList_Append(list, item);
164 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000165 if (err < 0)
166 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000167 i = j = i + n;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000168 splitcount++;
169 if (maxsplit && (splitcount >= maxsplit))
170 break;
Guido van Rossume270b431992-09-03 20:21:07 +0000171 }
172 else
173 i++;
174 }
Barry Warsawf5256011996-12-09 18:35:56 +0000175 item = PyString_FromStringAndSize(s+j, (int)(len-j));
Guido van Rossum572d2d91993-11-05 10:14:49 +0000176 if (item == NULL)
177 goto fail;
Barry Warsawf5256011996-12-09 18:35:56 +0000178 err = PyList_Append(list, item);
179 Py_DECREF(item);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000180 if (err < 0)
181 goto fail;
Guido van Rossume270b431992-09-03 20:21:07 +0000182
183 return list;
Guido van Rossum572d2d91993-11-05 10:14:49 +0000184
185 fail:
Barry Warsawf5256011996-12-09 18:35:56 +0000186 Py_DECREF(list);
Guido van Rossum572d2d91993-11-05 10:14:49 +0000187 return NULL;
Guido van Rossume270b431992-09-03 20:21:07 +0000188}
189
190
Guido van Rossum983c9301997-12-29 19:52:29 +0000191static char joinfields__doc__[] =
192"join(list [,sep]) -> string\n\
193joinfields(list [,sep]) -> string\n\
194\n\
195Return a string composed of the words in list, with\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000196intervening occurences of sep. Sep defaults to a single\n\
197space.\n\
Guido van Rossum983c9301997-12-29 19:52:29 +0000198\n\
Guido van Rossum23e21e71997-12-29 19:57:36 +0000199(join and joinfields are synonymous)";
Guido van Rossum983c9301997-12-29 19:52:29 +0000200
Barry Warsawf5256011996-12-09 18:35:56 +0000201static PyObject *
Guido van Rossumc89705d1992-11-26 08:54:07 +0000202strop_joinfields(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000203 PyObject *self; /* Not used */
204 PyObject *args;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000205{
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000206 PyObject *seq;
207 char *sep = NULL;
208 int seqlen, seplen = 0;
209 int i, reslen = 0, slen = 0, sz = 100;
210 PyObject *res = NULL;
211 char* p = NULL;
212 intargfunc getitemfunc;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000213
Guido van Rossum7e488981998-10-08 02:25:24 +0000214 if (!PyArg_ParseTuple(args, "O|t#", &seq, &sep, &seplen))
Guido van Rossumc89705d1992-11-26 08:54:07 +0000215 return NULL;
Guido van Rossum009e79b1995-05-03 17:40:23 +0000216 if (sep == NULL) {
217 sep = " ";
218 seplen = 1;
219 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000220
221 seqlen = PySequence_Length(seq);
222 if (seqlen < 0 && PyErr_Occurred())
223 return NULL;
224
225 if (seqlen == 1) {
226 /* Optimization if there's only one item */
227 PyObject *item = PySequence_GetItem(seq, 0);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000228 if (item && !PyString_Check(item)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000229 PyErr_SetString(PyExc_TypeError,
230 "first argument must be sequence of strings");
Guido van Rossumbf338301998-10-19 13:38:36 +0000231 Py_DECREF(item);
Guido van Rossum1ad1b3f1998-02-06 22:37:12 +0000232 return NULL;
233 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000234 return item;
235 }
236
237 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
238 return NULL;
239 p = PyString_AsString(res);
240
241 /* optimize for lists, since it's the most common case. all others
242 * (tuples and arbitrary sequences) just use the sequence abstract
243 * interface.
Barry Warsaw04d2d151997-01-03 23:46:51 +0000244 */
245 if (PyList_Check(seq)) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000246 for (i = 0; i < seqlen; i++) {
247 PyObject *item = PyList_GET_ITEM(seq, i);
248 if (!PyString_Check(item)) {
249 PyErr_SetString(PyExc_TypeError,
250 "first argument must be sequence of strings");
251 Py_DECREF(res);
252 return NULL;
253 }
254 slen = PyString_GET_SIZE(item);
255 while (reslen + slen + seplen >= sz) {
256 if (_PyString_Resize(&res, sz * 2)) {
257 Py_DECREF(res);
258 return NULL;
259 }
260 sz *= 2;
261 p = PyString_AsString(res) + reslen;
262 }
263 if (i > 0) {
264 memcpy(p, sep, seplen);
265 p += seplen;
266 reslen += seplen;
267 }
268 memcpy(p, PyString_AS_STRING(item), slen);
269 p += slen;
270 reslen += slen;
271 }
272 if (_PyString_Resize(&res, reslen)) {
273 Py_DECREF(res);
274 res = NULL;
275 }
276 return res;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000277 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000278
279 if (seq->ob_type->tp_as_sequence == NULL ||
280 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
281 {
Barry Warsawf5256011996-12-09 18:35:56 +0000282 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000283 "first argument must be a sequence");
Guido van Rossumc89705d1992-11-26 08:54:07 +0000284 return NULL;
285 }
Guido van Rossum7df115d1998-05-22 00:53:47 +0000286 /* This is now type safe */
Guido van Rossumc89705d1992-11-26 08:54:07 +0000287 for (i = 0; i < seqlen; i++) {
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000288 PyObject *item = getitemfunc(seq, i);
289 if (!item || !PyString_Check(item)) {
Barry Warsawf5256011996-12-09 18:35:56 +0000290 PyErr_SetString(PyExc_TypeError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000291 "first argument must be sequence of strings");
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000292 Py_DECREF(res);
293 Py_XDECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000294 return NULL;
295 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000296 slen = PyString_GET_SIZE(item);
297 while (reslen + slen + seplen >= sz) {
298 if (_PyString_Resize(&res, sz * 2)) {
299 Py_DECREF(res);
300 Py_DECREF(item);
301 return NULL;
302 }
303 sz *= 2;
304 p = PyString_AsString(res) + reslen;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000305 }
Guido van Rossumc89705d1992-11-26 08:54:07 +0000306 if (i > 0) {
307 memcpy(p, sep, seplen);
308 p += seplen;
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000309 reslen += seplen;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000310 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000311 memcpy(p, PyString_AS_STRING(item), slen);
312 p += slen;
313 reslen += slen;
314 Py_DECREF(item);
Guido van Rossumc89705d1992-11-26 08:54:07 +0000315 }
Barry Warsawd4ff1b91997-01-06 22:48:32 +0000316 if (_PyString_Resize(&res, reslen)) {
317 Py_DECREF(res);
318 res = NULL;
Guido van Rossumc89705d1992-11-26 08:54:07 +0000319 }
320 return res;
321}
322
Guido van Rossum983c9301997-12-29 19:52:29 +0000323
324static char find__doc__[] =
325"find(s, sub [,start [,end]]) -> in\n\
326\n\
327Return the lowest index in s where substring sub is found,\n\
328such that sub is contained within s[start,end]. Optional\n\
329arguments start and end are interpreted as in slice notation.\n\
330\n\
331Return -1 on failure.";
332
Barry Warsawf5256011996-12-09 18:35:56 +0000333static PyObject *
Guido van Rossum5806a4f1994-08-17 13:15:46 +0000334strop_find(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000335 PyObject *self; /* Not used */
336 PyObject *args;
Guido van Rossume270b431992-09-03 20:21:07 +0000337{
338 char *s, *sub;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000339 int len, n, i = 0, last = INT_MAX;
Guido van Rossume270b431992-09-03 20:21:07 +0000340
Guido van Rossum7e488981998-10-08 02:25:24 +0000341 if (!PyArg_ParseTuple(args, "t#t#|ii", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000342 return NULL;
343
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000344 if (last > len)
345 last = len;
346 if (last < 0)
347 last += len;
348 if (last < 0)
349 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000350 if (i < 0)
351 i += len;
352 if (i < 0)
Guido van Rossume270b431992-09-03 20:21:07 +0000353 i = 0;
Guido van Rossume270b431992-09-03 20:21:07 +0000354
Guido van Rossum031c6311998-03-24 04:19:22 +0000355 if (n == 0 && i <= last)
Barry Warsawf5256011996-12-09 18:35:56 +0000356 return PyInt_FromLong((long)i);
Guido van Rossume270b431992-09-03 20:21:07 +0000357
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000358 last -= n;
359 for (; i <= last; ++i)
Guido van Rossumee9012f1993-10-26 15:23:55 +0000360 if (s[i] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000361 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000362 return PyInt_FromLong((long)i);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000363
Barry Warsawf5256011996-12-09 18:35:56 +0000364 return PyInt_FromLong(-1L);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000365}
366
367
Guido van Rossum983c9301997-12-29 19:52:29 +0000368static char rfind__doc__[] =
369"rfind(s, sub [,start [,end]]) -> int\n\
370\n\
371Return the highest index in s where substring sub is found,\n\
372such that sub is contained within s[start,end]. Optional\n\
373arguments start and end are interpreted as in slice notation.\n\
374\n\
375Return -1 on failure.";
376
Barry Warsawf5256011996-12-09 18:35:56 +0000377static PyObject *
Guido van Rossum5806a4f1994-08-17 13:15:46 +0000378strop_rfind(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000379 PyObject *self; /* Not used */
380 PyObject *args;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000381{
382 char *s, *sub;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000383 int len, n, j;
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000384 int i = 0, last = INT_MAX;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000385
Guido van Rossum7e488981998-10-08 02:25:24 +0000386 if (!PyArg_ParseTuple(args, "t#t#|ii", &s, &len, &sub, &n, &i, &last))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000387 return NULL;
388
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000389 if (last > len)
390 last = len;
391 if (last < 0)
392 last += len;
393 if (last < 0)
394 last = 0;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000395 if (i < 0)
396 i += len;
397 if (i < 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000398 i = 0;
Guido van Rossumee9012f1993-10-26 15:23:55 +0000399
Guido van Rossum031c6311998-03-24 04:19:22 +0000400 if (n == 0 && i <= last)
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000401 return PyInt_FromLong((long)last);
Guido van Rossumee9012f1993-10-26 15:23:55 +0000402
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000403 for (j = last-n; j >= i; --j)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000404 if (s[j] == sub[0] &&
Guido van Rossuma0ca4c41996-10-04 13:39:37 +0000405 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
Barry Warsawf5256011996-12-09 18:35:56 +0000406 return PyInt_FromLong((long)j);
Guido van Rossume270b431992-09-03 20:21:07 +0000407
Barry Warsawf5256011996-12-09 18:35:56 +0000408 return PyInt_FromLong(-1L);
Guido van Rossume270b431992-09-03 20:21:07 +0000409}
410
Guido van Rossum983c9301997-12-29 19:52:29 +0000411
Barry Warsawf5256011996-12-09 18:35:56 +0000412static PyObject *
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000413do_strip(args, striptype)
Barry Warsawf5256011996-12-09 18:35:56 +0000414 PyObject *args;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000415 int striptype;
416{
417 char *s;
418 int len, i, j;
419
420
Guido van Rossum7e488981998-10-08 02:25:24 +0000421 if (!PyArg_Parse(args, "t#", &s, &len))
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000422 return NULL;
423
424 i = 0;
425 if (striptype != RIGHTSTRIP) {
426 while (i < len && isspace(Py_CHARMASK(s[i]))) {
427 i++;
428 }
429 }
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000430
431 j = len;
432 if (striptype != LEFTSTRIP) {
433 do {
434 j--;
435 } while (j >= i && isspace(Py_CHARMASK(s[j])));
436 j++;
437 }
438
439 if (i == 0 && j == len) {
Barry Warsawf5256011996-12-09 18:35:56 +0000440 Py_INCREF(args);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000441 return args;
442 }
443 else
Barry Warsawf5256011996-12-09 18:35:56 +0000444 return PyString_FromStringAndSize(s+i, j-i);
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000445}
446
Guido van Rossume270b431992-09-03 20:21:07 +0000447
Guido van Rossum983c9301997-12-29 19:52:29 +0000448static char strip__doc__[] =
449"strip(s) -> string\n\
450\n\
451Return a copy of the string s with leading and trailing\n\
452whitespace removed.";
453
Barry Warsawf5256011996-12-09 18:35:56 +0000454static PyObject *
Guido van Rossume270b431992-09-03 20:21:07 +0000455strop_strip(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000456 PyObject *self; /* Not used */
457 PyObject *args;
Guido van Rossume270b431992-09-03 20:21:07 +0000458{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000459 return do_strip(args, BOTHSTRIP);
460}
Guido van Rossume270b431992-09-03 20:21:07 +0000461
Guido van Rossum983c9301997-12-29 19:52:29 +0000462
463static char lstrip__doc__[] =
464"lstrip(s) -> string\n\
465\n\
466Return a copy of the string s with leading whitespace removed.";
467
Barry Warsawf5256011996-12-09 18:35:56 +0000468static PyObject *
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000469strop_lstrip(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000470 PyObject *self; /* Not used */
471 PyObject *args;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000472{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000473 return do_strip(args, LEFTSTRIP);
474}
Guido van Rossume270b431992-09-03 20:21:07 +0000475
Guido van Rossum983c9301997-12-29 19:52:29 +0000476
477static char rstrip__doc__[] =
478"rstrip(s) -> string\n\
479\n\
480Return a copy of the string s with trailing whitespace removed.";
481
Barry Warsawf5256011996-12-09 18:35:56 +0000482static PyObject *
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000483strop_rstrip(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000484 PyObject *self; /* Not used */
485 PyObject *args;
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000486{
Guido van Rossum7999a5c1996-08-08 19:16:15 +0000487 return do_strip(args, RIGHTSTRIP);
Guido van Rossume270b431992-09-03 20:21:07 +0000488}
489
490
Guido van Rossum983c9301997-12-29 19:52:29 +0000491static char lower__doc__[] =
492"lower(s) -> string\n\
493\n\
494Return a copy of the string s converted to lowercase.";
495
Barry Warsawf5256011996-12-09 18:35:56 +0000496static PyObject *
Barry Warsaw04d2d151997-01-03 23:46:51 +0000497strop_lower(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000498 PyObject *self; /* Not used */
499 PyObject *args;
Guido van Rossum5c850621992-09-11 23:55:51 +0000500{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000501 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000502 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000503 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000504 int changed;
505
Guido van Rossum7e488981998-10-08 02:25:24 +0000506 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000507 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000508 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000509 if (new == NULL)
510 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000511 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000512 changed = 0;
513 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000514 int c = Py_CHARMASK(*s++);
Barry Warsaw04d2d151997-01-03 23:46:51 +0000515 if (isupper(c)) {
Guido van Rossum5c850621992-09-11 23:55:51 +0000516 changed = 1;
Barry Warsaw04d2d151997-01-03 23:46:51 +0000517 *s_new = tolower(c);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000518 } else
519 *s_new = c;
520 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000521 }
522 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000523 Py_DECREF(new);
524 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000525 return args;
526 }
527 return new;
528}
529
530
Guido van Rossum983c9301997-12-29 19:52:29 +0000531static char upper__doc__[] =
532"upper(s) -> string\n\
533\n\
534Return a copy of the string s converted to uppercase.";
535
Barry Warsawf5256011996-12-09 18:35:56 +0000536static PyObject *
Guido van Rossum5c850621992-09-11 23:55:51 +0000537strop_upper(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000538 PyObject *self; /* Not used */
539 PyObject *args;
Guido van Rossum5c850621992-09-11 23:55:51 +0000540{
Barry Warsaw04d2d151997-01-03 23:46:51 +0000541 char *s, *s_new;
542 int i, n;
543 PyObject *new;
544 int changed;
545
Guido van Rossum7e488981998-10-08 02:25:24 +0000546 if (!PyArg_Parse(args, "t#", &s, &n))
Barry Warsaw04d2d151997-01-03 23:46:51 +0000547 return NULL;
548 new = PyString_FromStringAndSize(NULL, n);
549 if (new == NULL)
550 return NULL;
551 s_new = PyString_AsString(new);
552 changed = 0;
553 for (i = 0; i < n; i++) {
554 int c = Py_CHARMASK(*s++);
555 if (islower(c)) {
556 changed = 1;
557 *s_new = toupper(c);
558 } else
559 *s_new = c;
560 s_new++;
561 }
562 if (!changed) {
563 Py_DECREF(new);
564 Py_INCREF(args);
565 return args;
566 }
567 return new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000568}
569
570
Guido van Rossum983c9301997-12-29 19:52:29 +0000571static char capitalize__doc__[] =
572"capitalize(s) -> string\n\
573\n\
574Return a copy of the string s with only its first character\n\
575capitalized.";
576
Barry Warsawf5256011996-12-09 18:35:56 +0000577static PyObject *
Guido van Rossum27457531996-06-12 04:24:52 +0000578strop_capitalize(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000579 PyObject *self; /* Not used */
580 PyObject *args;
Guido van Rossum27457531996-06-12 04:24:52 +0000581{
582 char *s, *s_new;
583 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000584 PyObject *new;
Guido van Rossum27457531996-06-12 04:24:52 +0000585 int changed;
586
Guido van Rossum7e488981998-10-08 02:25:24 +0000587 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum27457531996-06-12 04:24:52 +0000588 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000589 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum27457531996-06-12 04:24:52 +0000590 if (new == NULL)
591 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000592 s_new = PyString_AsString(new);
Guido van Rossum27457531996-06-12 04:24:52 +0000593 changed = 0;
Guido van Rossum529c9631996-06-17 16:59:33 +0000594 if (0 < n) {
Guido van Rossum27457531996-06-12 04:24:52 +0000595 int c = Py_CHARMASK(*s++);
596 if (islower(c)) {
597 changed = 1;
598 *s_new = toupper(c);
599 } else
600 *s_new = c;
601 s_new++;
602 }
603 for (i = 1; i < n; i++) {
604 int c = Py_CHARMASK(*s++);
605 if (isupper(c)) {
606 changed = 1;
607 *s_new = tolower(c);
608 } else
609 *s_new = c;
610 s_new++;
611 }
612 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000613 Py_DECREF(new);
614 Py_INCREF(args);
Guido van Rossum27457531996-06-12 04:24:52 +0000615 return args;
616 }
617 return new;
618}
619
620
Guido van Rossum54ec2881999-01-25 22:36:24 +0000621static char expandtabs__doc__[] =
622"expandtabs(string, [tabsize]) -> string\n\
623\n\
Guido van Rossum54ec2881999-01-25 22:36:24 +0000624Expand tabs in a string, i.e. replace them by one or more spaces,\n\
625depending on the current column and the given tab size (default 8).\n\
626The column number is reset to zero after each newline occurring in the\n\
627string. This doesn't understand other non-printing characters.";
628
629static PyObject *
630strop_expandtabs(self, args)
631 PyObject *self;
632 PyObject *args;
633{
634 /* Original by Fredrik Lundh */
635 char* e;
636 char* p;
637 char* q;
638 int i, j;
639 PyObject* out;
640 char* string;
641 int stringlen;
642 int tabsize = 8;
643
644 /* Get arguments */
645 if (!PyArg_ParseTuple(args, "s#|i", &string, &stringlen, &tabsize))
646 return NULL;
647 if (tabsize < 1) {
648 PyErr_SetString(PyExc_ValueError,
649 "tabsize must be at least 1");
650 return NULL;
651 }
652
653 /* First pass: determine size of output string */
654 i = j = 0; /* j: current column; i: total of previous lines */
655 e = string + stringlen;
656 for (p = string; p < e; p++) {
657 if (*p == '\t')
658 j += tabsize - (j%tabsize);
659 else {
660 j++;
661 if (*p == '\n') {
662 i += j;
663 j = 0;
664 }
665 }
666 }
667
668 /* Second pass: create output string and fill it */
669 out = PyString_FromStringAndSize(NULL, i+j);
670 if (out == NULL)
671 return NULL;
672
673 i = 0;
674 q = PyString_AS_STRING(out);
675
676 for (p = string; p < e; p++) {
677 if (*p == '\t') {
678 j = tabsize - (i%tabsize);
679 i += j;
680 while (j-- > 0)
681 *q++ = ' ';
682 } else {
683 *q++ = *p;
684 i++;
685 if (*p == '\n')
686 i = 0;
687 }
688 }
689
690 return out;
691}
692
693
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000694static char count__doc__[] =
695"count(s, sub[, start[, end]]) -> int\n\
696\n\
697Return the number of occurrences of substring sub in string\n\
698s[start:end]. Optional arguments start and end are\n\
699interpreted as in slice notation.";
700
701static PyObject *
702strop_count(self, args)
703 PyObject *self; /* Not used */
704 PyObject *args;
705{
706 char *s, *sub;
Guido van Rossumc5015831998-10-07 16:36:14 +0000707 int len, n;
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000708 int i = 0, last = INT_MAX;
709 int m, r;
710
Guido van Rossum7e488981998-10-08 02:25:24 +0000711 if (!PyArg_ParseTuple(args, "t#t#|ii", &s, &len, &sub, &n, &i, &last))
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +0000712 return NULL;
713 if (last > len)
714 last = len;
715 if (last < 0)
716 last += len;
717 if (last < 0)
718 last = 0;
719 if (i < 0)
720 i += len;
721 if (i < 0)
722 i = 0;
723 m = last + 1 - n;
724 if (n == 0)
725 return PyInt_FromLong((long) (m-i));
726
727 r = 0;
728 while (i < m) {
729 if (!memcmp(s+i, sub, n)) {
730 r++;
731 i += n;
732 } else {
733 i++;
734 }
735 }
736 return PyInt_FromLong((long) r);
737}
738
739
Guido van Rossum983c9301997-12-29 19:52:29 +0000740static char swapcase__doc__[] =
741"swapcase(s) -> string\n\
742\n\
743Return a copy of the string s with upper case characters\n\
744converted to lowercase and vice versa.";
745
Barry Warsawf5256011996-12-09 18:35:56 +0000746static PyObject *
Guido van Rossum5c850621992-09-11 23:55:51 +0000747strop_swapcase(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000748 PyObject *self; /* Not used */
749 PyObject *args;
Guido van Rossum5c850621992-09-11 23:55:51 +0000750{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000751 char *s, *s_new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000752 int i, n;
Barry Warsawf5256011996-12-09 18:35:56 +0000753 PyObject *new;
Guido van Rossum5c850621992-09-11 23:55:51 +0000754 int changed;
755
Guido van Rossum7e488981998-10-08 02:25:24 +0000756 if (!PyArg_Parse(args, "t#", &s, &n))
Guido van Rossum5c850621992-09-11 23:55:51 +0000757 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000758 new = PyString_FromStringAndSize(NULL, n);
Guido van Rossum5c850621992-09-11 23:55:51 +0000759 if (new == NULL)
760 return NULL;
Barry Warsawf5256011996-12-09 18:35:56 +0000761 s_new = PyString_AsString(new);
Guido van Rossum5c850621992-09-11 23:55:51 +0000762 changed = 0;
763 for (i = 0; i < n; i++) {
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000764 int c = Py_CHARMASK(*s++);
Guido van Rossum5c850621992-09-11 23:55:51 +0000765 if (islower(c)) {
766 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000767 *s_new = toupper(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000768 }
769 else if (isupper(c)) {
770 changed = 1;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000771 *s_new = tolower(c);
Guido van Rossum5c850621992-09-11 23:55:51 +0000772 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000773 else
774 *s_new = c;
775 s_new++;
Guido van Rossum5c850621992-09-11 23:55:51 +0000776 }
777 if (!changed) {
Barry Warsawf5256011996-12-09 18:35:56 +0000778 Py_DECREF(new);
779 Py_INCREF(args);
Guido van Rossum5c850621992-09-11 23:55:51 +0000780 return args;
781 }
782 return new;
783}
784
785
Guido van Rossum983c9301997-12-29 19:52:29 +0000786static char atoi__doc__[] =
787"atoi(s [,base]) -> int\n\
788\n\
789Return the integer represented by the string s in the given\n\
790base, which defaults to 10. The string s must consist of one\n\
791or more digits, possibly preceded by a sign. If base is 0, it\n\
792is chosen from the leading characters of s, 0 for octal, 0x or\n\
7930X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
794accepted.";
795
Barry Warsawf5256011996-12-09 18:35:56 +0000796static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000797strop_atoi(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000798 PyObject *self; /* Not used */
799 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000800{
Guido van Rossumb6775db1994-08-01 11:34:53 +0000801 char *s, *end;
802 int base = 10;
803 long x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000804 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000805
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000806 if (!PyArg_ParseTuple(args, "s|i", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000807 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000808
809 if ((base != 0 && base < 2) || base > 36) {
810 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
811 return NULL;
812 }
813
Guido van Rossumc35f9331996-09-11 23:30:42 +0000814 while (*s && isspace(Py_CHARMASK(*s)))
815 s++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000816 errno = 0;
817 if (base == 0 && s[0] == '0')
Barry Warsawf5256011996-12-09 18:35:56 +0000818 x = (long) PyOS_strtoul(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000819 else
Barry Warsawf5256011996-12-09 18:35:56 +0000820 x = PyOS_strtol(s, &end, base);
Guido van Rossum5bd69db1999-02-22 16:18:44 +0000821 if (end == s || !isalnum(end[-1]))
Guido van Rossum923fece51998-08-04 15:04:52 +0000822 goto bad;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000823 while (*end && isspace(Py_CHARMASK(*end)))
824 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000825 if (*end != '\0') {
Guido van Rossum923fece51998-08-04 15:04:52 +0000826 bad:
Guido van Rossumc35f9331996-09-11 23:30:42 +0000827 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000828 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000829 return NULL;
830 }
831 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000832 sprintf(buffer, "atoi() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000833 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000834 return NULL;
835 }
Barry Warsawf5256011996-12-09 18:35:56 +0000836 return PyInt_FromLong(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000837}
838
839
Guido van Rossum983c9301997-12-29 19:52:29 +0000840static char atol__doc__[] =
841"atol(s [,base]) -> long\n\
842\n\
843Return the long integer represented by the string s in the\n\
844given base, which defaults to 10. The string s must consist\n\
845of one or more digits, possibly preceded by a sign. If base\n\
846is 0, it is chosen from the leading characters of s, 0 for\n\
847octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
8480x or 0X is accepted. A trailing L or l is not accepted,\n\
849unless base is 0.";
850
Barry Warsawf5256011996-12-09 18:35:56 +0000851static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000852strop_atol(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000853 PyObject *self; /* Not used */
854 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000855{
856 char *s, *end;
857 int base = 10;
Barry Warsawf5256011996-12-09 18:35:56 +0000858 PyObject *x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000859 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000860
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000861 if (!PyArg_ParseTuple(args, "s|i", &s, &base))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000862 return NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000863
864 if ((base != 0 && base < 2) || base > 36) {
865 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
866 return NULL;
867 }
868
Guido van Rossumc35f9331996-09-11 23:30:42 +0000869 while (*s && isspace(Py_CHARMASK(*s)))
870 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000871 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000872 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000873 return NULL;
874 }
Barry Warsawf5256011996-12-09 18:35:56 +0000875 x = PyLong_FromString(s, &end, base);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000876 if (x == NULL)
877 return NULL;
878 if (base == 0 && (*end == 'l' || *end == 'L'))
879 end++;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000880 while (*end && isspace(Py_CHARMASK(*end)))
881 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000882 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000883 sprintf(buffer, "invalid literal for atol(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000884 PyErr_SetString(PyExc_ValueError, buffer);
885 Py_DECREF(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000886 return NULL;
887 }
888 return x;
889}
890
891
Guido van Rossum983c9301997-12-29 19:52:29 +0000892static char atof__doc__[] =
893"atof(s) -> float\n\
894\n\
895Return the floating point number represented by the string s.";
896
Barry Warsawf5256011996-12-09 18:35:56 +0000897static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000898strop_atof(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000899 PyObject *self; /* Not used */
900 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000901{
Barry Warsawf5256011996-12-09 18:35:56 +0000902 extern double strtod Py_PROTO((const char *, char **));
Guido van Rossumb6775db1994-08-01 11:34:53 +0000903 char *s, *end;
904 double x;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000905 char buffer[256]; /* For errors */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000906
Guido van Rossum74608f81997-12-30 05:44:10 +0000907 if (!PyArg_ParseTuple(args, "s", &s))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000908 return NULL;
Guido van Rossumc35f9331996-09-11 23:30:42 +0000909 while (*s && isspace(Py_CHARMASK(*s)))
910 s++;
Guido van Rossum171191e1996-08-21 20:02:25 +0000911 if (s[0] == '\0') {
Barry Warsawf5256011996-12-09 18:35:56 +0000912 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
Guido van Rossum171191e1996-08-21 20:02:25 +0000913 return NULL;
914 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000915 errno = 0;
Guido van Rossum52fa3a61997-02-14 22:59:58 +0000916 PyFPE_START_PROTECT("strop_atof", return 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000917 x = strtod(s, &end);
Guido van Rossum7b7c5781997-03-14 04:13:56 +0000918 PyFPE_END_PROTECT(x)
Guido van Rossumc35f9331996-09-11 23:30:42 +0000919 while (*end && isspace(Py_CHARMASK(*end)))
920 end++;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000921 if (*end != '\0') {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000922 sprintf(buffer, "invalid literal for atof(): %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000923 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000924 return NULL;
925 }
926 else if (errno != 0) {
Guido van Rossumc35f9331996-09-11 23:30:42 +0000927 sprintf(buffer, "atof() literal too large: %.200s", s);
Barry Warsawf5256011996-12-09 18:35:56 +0000928 PyErr_SetString(PyExc_ValueError, buffer);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000929 return NULL;
930 }
Barry Warsawf5256011996-12-09 18:35:56 +0000931 return PyFloat_FromDouble(x);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000932}
933
934
Guido van Rossum983c9301997-12-29 19:52:29 +0000935static char maketrans__doc__[] =
936"maketrans(frm, to) -> string\n\
937\n\
938Return a translation table (a string of 256 bytes long)\n\
939suitable for use in string.translate. The strings frm and to\n\
940must be of the same length.";
941
Guido van Rossumed7253c1996-07-23 18:12:39 +0000942static PyObject *
943strop_maketrans(self, args)
944 PyObject *self; /* Not used */
945 PyObject *args;
946{
Guido van Rossume0548b81997-01-06 16:50:09 +0000947 unsigned char *c, *from=NULL, *to=NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000948 int i, fromlen=0, tolen=0;
Guido van Rossume0548b81997-01-06 16:50:09 +0000949 PyObject *result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000950
Guido van Rossum7e488981998-10-08 02:25:24 +0000951 if (!PyArg_ParseTuple(args, "t#t#", &from, &fromlen, &to, &tolen))
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000952 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000953
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000954 if (fromlen != tolen) {
Barry Warsawf5256011996-12-09 18:35:56 +0000955 PyErr_SetString(PyExc_ValueError,
Guido van Rossumed7253c1996-07-23 18:12:39 +0000956 "maketrans arguments must have same length");
957 return NULL;
958 }
Guido van Rossume0548b81997-01-06 16:50:09 +0000959
960 result = PyString_FromStringAndSize((char *)NULL, 256);
961 if (result == NULL)
962 return NULL;
963 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000964 for (i = 0; i < 256; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000965 c[i]=(unsigned char)i;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000966 for (i = 0; i < fromlen; i++)
Guido van Rossumed7253c1996-07-23 18:12:39 +0000967 c[from[i]]=to[i];
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000968
Guido van Rossume0548b81997-01-06 16:50:09 +0000969 return result;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000970}
971
972
Guido van Rossum983c9301997-12-29 19:52:29 +0000973static char translate__doc__[] =
974"translate(s,table [,deletechars]) -> string\n\
975\n\
976Return a copy of the string s, where all characters occurring\n\
977in the optional argument deletechars are removed, and the\n\
978remaining characters have been mapped through the given\n\
979translation table, which must be a string of length 256.";
980
Barry Warsawf5256011996-12-09 18:35:56 +0000981static PyObject *
Guido van Rossuma3127e81995-09-13 17:39:06 +0000982strop_translate(self, args)
Barry Warsawf5256011996-12-09 18:35:56 +0000983 PyObject *self;
984 PyObject *args;
Guido van Rossuma3127e81995-09-13 17:39:06 +0000985{
Guido van Rossume0548b81997-01-06 16:50:09 +0000986 register char *input, *table, *output;
987 register int i, c, changed = 0;
988 PyObject *input_obj;
989 char *table1, *output_start, *del_table=NULL;
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000990 int inlen, tablen, dellen = 0;
Guido van Rossumed7253c1996-07-23 18:12:39 +0000991 PyObject *result;
Guido van Rossume0548b81997-01-06 16:50:09 +0000992 int trans_table[256];
Guido van Rossuma3127e81995-09-13 17:39:06 +0000993
Guido van Rossum7e488981998-10-08 02:25:24 +0000994 if (!PyArg_ParseTuple(args, "St#|t#", &input_obj,
Guido van Rossume0548b81997-01-06 16:50:09 +0000995 &table1, &tablen, &del_table, &dellen))
Guido van Rossuma3127e81995-09-13 17:39:06 +0000996 return NULL;
997 if (tablen != 256) {
Barry Warsawf5256011996-12-09 18:35:56 +0000998 PyErr_SetString(PyExc_ValueError,
Barry Warsawe8fc29c1997-01-03 22:45:34 +0000999 "translation table must be 256 characters long");
Guido van Rossuma3127e81995-09-13 17:39:06 +00001000 return NULL;
1001 }
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001002
Guido van Rossume0548b81997-01-06 16:50:09 +00001003 table = table1;
1004 inlen = PyString_Size(input_obj);
Guido van Rossumed7253c1996-07-23 18:12:39 +00001005 result = PyString_FromStringAndSize((char *)NULL, inlen);
Guido van Rossuma3127e81995-09-13 17:39:06 +00001006 if (result == NULL)
1007 return NULL;
Guido van Rossumed7253c1996-07-23 18:12:39 +00001008 output_start = output = PyString_AsString(result);
Guido van Rossume0548b81997-01-06 16:50:09 +00001009 input = PyString_AsString(input_obj);
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001010
Guido van Rossume0548b81997-01-06 16:50:09 +00001011 if (dellen == 0) {
1012 /* If no deletions are required, use faster code */
1013 for (i = inlen; --i >= 0; ) {
1014 c = Py_CHARMASK(*input++);
1015 if (Py_CHARMASK((*output++ = table[c])) != c)
1016 changed = 1;
Guido van Rossumed7253c1996-07-23 18:12:39 +00001017 }
Guido van Rossume0548b81997-01-06 16:50:09 +00001018 if (changed)
1019 return result;
1020 Py_DECREF(result);
1021 Py_INCREF(input_obj);
1022 return input_obj;
Guido van Rossuma3127e81995-09-13 17:39:06 +00001023 }
Guido van Rossume0548b81997-01-06 16:50:09 +00001024
1025 for (i = 0; i < 256; i++)
1026 trans_table[i] = Py_CHARMASK(table[i]);
1027
Guido van Rossum983c9301997-12-29 19:52:29 +00001028 for (i = 0; i < dellen; i++)
Guido van Rossum1ed5e571997-04-29 21:34:16 +00001029 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Guido van Rossume0548b81997-01-06 16:50:09 +00001030
1031 for (i = inlen; --i >= 0; ) {
1032 c = Py_CHARMASK(*input++);
1033 if (trans_table[c] != -1)
1034 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1035 continue;
1036 changed = 1;
1037 }
1038 if (!changed) {
1039 Py_DECREF(result);
1040 Py_INCREF(input_obj);
1041 return input_obj;
1042 }
1043 /* Fix the size of the resulting string */
1044 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
Guido van Rossum983c9301997-12-29 19:52:29 +00001045 return NULL;
Guido van Rossuma3127e81995-09-13 17:39:06 +00001046 return result;
1047}
1048
1049
Guido van Rossum101923b1997-04-02 06:11:18 +00001050/* What follows is used for implementing replace(). Perry Stoll. */
1051
1052/*
1053 mymemfind
1054
1055 strstr replacement for arbitrary blocks of memory.
1056
1057 Locates the first occurance in the memory pointed to by MEM of the
1058 contents of memory pointed to by PAT. Returns the index into MEM if
1059 found, or -1 if not found. If len of PAT is greater than length of
Guido van Rossum983c9301997-12-29 19:52:29 +00001060 MEM, the function returns -1.
Guido van Rossum101923b1997-04-02 06:11:18 +00001061*/
1062static int mymemfind(mem, len, pat, pat_len)
1063 char *mem;
1064 int len;
1065 char *pat;
1066 int pat_len;
1067{
1068 register int ii;
1069
1070 /* pattern can not occur in the last pat_len-1 chars */
1071 len -= pat_len;
1072
1073 for (ii = 0; ii <= len; ii++) {
1074 if (mem[ii] == pat[0] &&
1075 (pat_len == 1 ||
1076 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1077 return ii;
1078 }
1079 }
1080 return -1;
1081}
1082
1083/*
1084 mymemcnt
1085
1086 Return the number of distinct times PAT is found in MEM.
1087 meaning mem=1111 and pat==11 returns 2.
1088 mem=11111 and pat==11 also return 2.
1089 */
1090static int mymemcnt(mem, len, pat, pat_len)
1091 char *mem;
1092 int len;
1093 char *pat;
1094 int pat_len;
1095{
1096 register int offset = 0;
1097 int nfound = 0;
1098
1099 while (len >= 0) {
1100 offset = mymemfind(mem, len, pat, pat_len);
1101 if (offset == -1)
1102 break;
1103 mem += offset + pat_len;
1104 len -= offset + pat_len;
1105 nfound++;
1106 }
1107 return nfound;
1108}
1109
Guido van Rossum983c9301997-12-29 19:52:29 +00001110/*
Guido van Rossum101923b1997-04-02 06:11:18 +00001111 mymemreplace
1112
1113 Return a string in which all occurences of PAT in memory STR are
Guido van Rossum983c9301997-12-29 19:52:29 +00001114 replaced with SUB.
Guido van Rossum101923b1997-04-02 06:11:18 +00001115
1116 If length of PAT is less than length of STR or there are no occurences
1117 of PAT in STR, then the original string is returned. Otherwise, a new
1118 string is allocated here and returned.
Guido van Rossum983c9301997-12-29 19:52:29 +00001119
Guido van Rossum101923b1997-04-02 06:11:18 +00001120 on return, out_len is:
1121 the length of output string, or
1122 -1 if the input string is returned, or
1123 unchanged if an error occurs (no memory).
1124
1125 return value is:
1126 the new string allocated locally, or
1127 NULL if an error occurred.
1128*/
Barry Warsawf577c081997-11-29 00:10:07 +00001129static char *mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
Guido van Rossum101923b1997-04-02 06:11:18 +00001130 char *str;
1131 int len; /* input string */
1132 char *pat;
1133 int pat_len; /* pattern string to find */
1134 char *sub;
1135 int sub_len; /* substitution string */
Barry Warsawf577c081997-11-29 00:10:07 +00001136 int count; /* number of replacements, 0 == all */
Guido van Rossum101923b1997-04-02 06:11:18 +00001137 int *out_len;
1138
1139{
1140 char *out_s;
1141 char *new_s;
1142 int nfound, offset, new_len;
1143
1144 if (len == 0 || pat_len > len)
1145 goto return_same;
1146
1147 /* find length of output string */
1148 nfound = mymemcnt(str, len, pat, pat_len);
Barry Warsawf577c081997-11-29 00:10:07 +00001149 if (count > 0)
1150 nfound = nfound > count ? count : nfound;
Guido van Rossum101923b1997-04-02 06:11:18 +00001151 if (nfound == 0)
1152 goto return_same;
1153 new_len = len + nfound*(sub_len - pat_len);
1154
1155 new_s = (char *)malloc(new_len);
1156 if (new_s == NULL) return NULL;
1157
1158 *out_len = new_len;
1159 out_s = new_s;
1160
1161 while (len > 0) {
1162 /* find index of next instance of pattern */
1163 offset = mymemfind(str, len, pat, pat_len);
1164 /* if not found, break out of loop */
1165 if (offset == -1) break;
1166
1167 /* copy non matching part of input string */
1168 memcpy(new_s, str, offset); /* copy part of str before pat */
1169 str += offset + pat_len; /* move str past pattern */
1170 len -= offset + pat_len; /* reduce length of str remaining */
1171
1172 /* copy substitute into the output string */
1173 new_s += offset; /* move new_s to dest for sub string */
1174 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1175 new_s += sub_len; /* offset new_s past sub string */
Barry Warsawf577c081997-11-29 00:10:07 +00001176
1177 /* break when we've done count replacements */
1178 if (--count == 0) break;
Guido van Rossum101923b1997-04-02 06:11:18 +00001179 }
1180 /* copy any remaining values into output string */
1181 if (len > 0)
1182 memcpy(new_s, str, len);
1183 return out_s;
1184
1185 return_same:
1186 *out_len = -1;
1187 return str;
1188}
1189
1190
Guido van Rossum983c9301997-12-29 19:52:29 +00001191static char replace__doc__[] =
1192"replace (str, old, new[, maxsplit]) -> string\n\
1193\n\
1194Return a copy of string str with all occurrences of substring\n\
1195old replaced by new. If the optional argument maxsplit is\n\
1196given, only the first maxsplit occurrences are replaced.";
1197
1198static PyObject *
Guido van Rossum101923b1997-04-02 06:11:18 +00001199strop_replace(self, args)
1200 PyObject *self; /* Not used */
1201 PyObject *args;
1202{
1203 char *str, *pat,*sub,*new_s;
1204 int len,pat_len,sub_len,out_len;
Barry Warsawf577c081997-11-29 00:10:07 +00001205 int count = 0;
Guido van Rossum101923b1997-04-02 06:11:18 +00001206 PyObject *new;
1207
Guido van Rossum7e488981998-10-08 02:25:24 +00001208 if (!PyArg_ParseTuple(args, "t#t#t#|i",
Barry Warsawf577c081997-11-29 00:10:07 +00001209 &str, &len, &pat, &pat_len, &sub, &sub_len,
1210 &count))
Guido van Rossum101923b1997-04-02 06:11:18 +00001211 return NULL;
Guido van Rossum4ccda151998-05-14 02:36:29 +00001212 if (pat_len <= 0) {
1213 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1214 return NULL;
1215 }
Barry Warsawf577c081997-11-29 00:10:07 +00001216 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
Guido van Rossum101923b1997-04-02 06:11:18 +00001217 if (new_s == NULL) {
1218 PyErr_NoMemory();
1219 return NULL;
1220 }
1221 if (out_len == -1) {
1222 /* we're returning another reference to the input string */
1223 new = PyTuple_GetItem(args, 0);
1224 Py_XINCREF(new);
1225 }
1226 else {
1227 new = PyString_FromStringAndSize(new_s, out_len);
1228 free(new_s);
1229 }
1230 return new;
1231}
1232
1233
Guido van Rossume270b431992-09-03 20:21:07 +00001234/* List of functions defined in the module */
1235
Barry Warsawe8fc29c1997-01-03 22:45:34 +00001236static PyMethodDef
1237strop_methods[] = {
Guido van Rossum983c9301997-12-29 19:52:29 +00001238 {"atof", strop_atof, 1, atof__doc__},
1239 {"atoi", strop_atoi, 1, atoi__doc__},
1240 {"atol", strop_atol, 1, atol__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001241 {"capitalize", strop_capitalize, 0, capitalize__doc__},
Guido van Rossumd5bcf9a1998-10-06 19:43:14 +00001242 {"count", strop_count, 1, count__doc__},
Guido van Rossum54ec2881999-01-25 22:36:24 +00001243 {"expandtabs", strop_expandtabs, 1, expandtabs__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001244 {"find", strop_find, 1, find__doc__},
1245 {"join", strop_joinfields, 1, joinfields__doc__},
1246 {"joinfields", strop_joinfields, 1, joinfields__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001247 {"lstrip", strop_lstrip, 0, lstrip__doc__},
1248 {"lower", strop_lower, 0, lower__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001249 {"maketrans", strop_maketrans, 1, maketrans__doc__},
1250 {"replace", strop_replace, 1, replace__doc__},
1251 {"rfind", strop_rfind, 1, rfind__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001252 {"rstrip", strop_rstrip, 0, rstrip__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001253 {"split", strop_splitfields, 1, splitfields__doc__},
1254 {"splitfields", strop_splitfields, 1, splitfields__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001255 {"strip", strop_strip, 0, strip__doc__},
1256 {"swapcase", strop_swapcase, 0, swapcase__doc__},
Guido van Rossum983c9301997-12-29 19:52:29 +00001257 {"translate", strop_translate, 1, translate__doc__},
Guido van Rossum74608f81997-12-30 05:44:10 +00001258 {"upper", strop_upper, 0, upper__doc__},
Guido van Rossume270b431992-09-03 20:21:07 +00001259 {NULL, NULL} /* sentinel */
1260};
1261
1262
Guido van Rossum3886bb61998-12-04 18:50:17 +00001263DL_EXPORT(void)
Guido van Rossume270b431992-09-03 20:21:07 +00001264initstrop()
1265{
Barry Warsawf5256011996-12-09 18:35:56 +00001266 PyObject *m, *d, *s;
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001267 char buf[256];
1268 int c, n;
Guido van Rossum983c9301997-12-29 19:52:29 +00001269 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1270 (PyObject*)NULL, PYTHON_API_VERSION);
Barry Warsawf5256011996-12-09 18:35:56 +00001271 d = PyModule_GetDict(m);
Guido van Rossume22e6441993-07-09 10:51:31 +00001272
1273 /* Create 'whitespace' object */
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001274 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001275 for (c = 0; c < 256; c++) {
Guido van Rossumd05eb8b1993-07-08 11:12:36 +00001276 if (isspace(c))
1277 buf[n++] = c;
1278 }
Barry Warsawf5256011996-12-09 18:35:56 +00001279 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001280 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001281 PyDict_SetItemString(d, "whitespace", s);
1282 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001283 }
1284 /* Create 'lowercase' object */
1285 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001286 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001287 if (islower(c))
1288 buf[n++] = c;
1289 }
Barry Warsawf5256011996-12-09 18:35:56 +00001290 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001291 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001292 PyDict_SetItemString(d, "lowercase", s);
1293 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001294 }
1295
1296 /* Create 'uppercase' object */
1297 n = 0;
Guido van Rossum7f7f2741995-02-10 17:01:56 +00001298 for (c = 0; c < 256; c++) {
Guido van Rossume22e6441993-07-09 10:51:31 +00001299 if (isupper(c))
1300 buf[n++] = c;
1301 }
Barry Warsawf5256011996-12-09 18:35:56 +00001302 s = PyString_FromStringAndSize(buf, n);
Guido van Rossume22e6441993-07-09 10:51:31 +00001303 if (s) {
Barry Warsawf5256011996-12-09 18:35:56 +00001304 PyDict_SetItemString(d, "uppercase", s);
1305 Py_DECREF(s);
Guido van Rossume22e6441993-07-09 10:51:31 +00001306 }
1307
Barry Warsawf5256011996-12-09 18:35:56 +00001308 if (PyErr_Occurred())
1309 Py_FatalError("can't initialize module strop");
Guido van Rossume270b431992-09-03 20:21:07 +00001310}