blob: 9b928f5beffd09ade80e487fbfd40cc3cf7739f6 [file] [log] [blame]
Guido van Rossumdb25f321997-07-10 14:31:32 +00001/***********************************************************
2Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* $Id$ */
33
34/* Regular expression objects */
35/* This uses Tatu Ylonen's copyleft-free reimplementation of
36 GNU regular expressions */
37
38#include "Python.h"
39
40#include <ctype.h>
41
42#include "regexpr.h"
43
44static PyObject *ReopError; /* Exception */
45
46static PyObject *
47makeresult(regs, num_regs)
48 struct re_registers *regs;
49 int num_regs;
50{
51 PyObject *v;
52 int i;
53 static PyObject *filler = NULL;
54
55 if (filler == NULL) {
56 filler = Py_BuildValue("(ii)", -1, -1);
57 if (filler == NULL)
58 return NULL;
59 }
60 v = PyTuple_New(num_regs);
61 if (v == NULL)
62 return NULL;
63
64 for (i = 0; i < num_regs; i++) {
65 int lo = regs->start[i];
66 int hi = regs->end[i];
67 PyObject *w;
68 if (lo == -1 && hi == -1) {
69 w = filler;
70 Py_INCREF(w);
71 }
72 else
73 w = Py_BuildValue("(ii)", lo, hi);
74 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
75 Py_DECREF(v);
76 return NULL;
77 }
78 }
79 return v;
80}
81
82static PyObject *
83reop_match(self, args)
84 PyObject *self;
85 PyObject *args;
86{
87 char *string;
88 int fastmaplen, stringlen;
89 int can_be_null, anchor, i;
Guido van Rossum04a1d741997-07-15 14:38:13 +000090 int flags, pos, result;
Guido van Rossumdb25f321997-07-10 14:31:32 +000091 struct re_pattern_buffer bufp;
92 struct re_registers re_regs;
93
94 if (!PyArg_Parse(args, "(s#iiis#is#i)",
95 &(bufp.buffer), &(bufp.allocated),
Guido van Rossum04a1d741997-07-15 14:38:13 +000096 &(bufp.num_registers), &flags, &can_be_null,
Guido van Rossumdb25f321997-07-10 14:31:32 +000097 &(bufp.fastmap), &fastmaplen,
98 &anchor,
99 &string, &stringlen,
100 &pos))
101 return NULL;
102
103 /* XXX sanity-check the input data */
104 bufp.used=bufp.allocated;
105 bufp.translate=NULL;
106 bufp.fastmap_accurate=1;
107 bufp.can_be_null=can_be_null;
108 bufp.uses_registers=1;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000109 bufp.anchor=anchor;
110
Guido van Rossum04a1d741997-07-15 14:38:13 +0000111 for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
Guido van Rossumdb25f321997-07-10 14:31:32 +0000112
113 result = re_match(&bufp,
114 string, stringlen, pos,
115 &re_regs);
116 if (result < -1) {
117 /* Failure like stack overflow */
118 PyErr_SetString(ReopError, "match failure");
119 return NULL;
120 }
Guido van Rossum63e18191997-07-11 11:08:38 +0000121 if (result == -1) {
122 Py_INCREF(Py_None);
123 return Py_None;
124 }
Guido van Rossum04a1d741997-07-15 14:38:13 +0000125 return makeresult(&re_regs, bufp.num_registers);
Guido van Rossumdb25f321997-07-10 14:31:32 +0000126}
127
128static PyObject *
129reop_search(self, args)
130 PyObject *self;
131 PyObject *args;
132{
133 char *string;
134 int fastmaplen, stringlen;
135 int can_be_null, anchor, i;
Guido van Rossum04a1d741997-07-15 14:38:13 +0000136 int flags, pos, result;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000137 struct re_pattern_buffer bufp;
138 struct re_registers re_regs;
139
140 if (!PyArg_Parse(args, "(s#iiis#is#i)",
141 &(bufp.buffer), &(bufp.allocated),
Guido van Rossum04a1d741997-07-15 14:38:13 +0000142 &(bufp.num_registers), &flags, &can_be_null,
Guido van Rossumdb25f321997-07-10 14:31:32 +0000143 &(bufp.fastmap), &fastmaplen,
144 &anchor,
145 &string, &stringlen,
146 &pos))
147 return NULL;
148
149 /* XXX sanity-check the input data */
150 bufp.used=bufp.allocated;
151 bufp.translate=NULL;
152 bufp.fastmap_accurate=1;
153 bufp.can_be_null=can_be_null;
154 bufp.uses_registers=1;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000155 bufp.anchor=anchor;
156
Guido van Rossum04a1d741997-07-15 14:38:13 +0000157 for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
Guido van Rossumdb25f321997-07-10 14:31:32 +0000158
159 result = re_search(&bufp,
160 string, stringlen, pos, stringlen-pos,
161 &re_regs);
162 if (result < -1) {
163 /* Failure like stack overflow */
164 PyErr_SetString(ReopError, "match failure");
165 return NULL;
166 }
Guido van Rossum63e18191997-07-11 11:08:38 +0000167 if (result == -1) {
168 Py_INCREF(Py_None);
169 return Py_None;
170 }
Guido van Rossum04a1d741997-07-15 14:38:13 +0000171 return makeresult(&re_regs, bufp.num_registers);
Guido van Rossumdb25f321997-07-10 14:31:32 +0000172}
173
174#if 0
175/* Functions originally in the regsub module.
176 Added June 1, 1997.
177 */
178
179/* A cache of previously used patterns is maintained. Notice that if
180 you change the reop syntax flag, entries in the cache are
181 invalidated.
182 XXX Solution: use (syntax flag, pattern) as keys? Clear the cache
183 every so often, or once it gets past a certain size?
184*/
185
186static PyObject *cache_dict=NULL;
187
188/* Accept an object; if it's a reop pattern, Py_INCREF it and return
189 it. If it's a string, a reop object is compiled and cached.
190*/
191
192static reopobject *
193cached_compile(pattern)
194 PyObject *pattern;
195{
196 reopobject *p2;
197
198 if (!PyString_Check(pattern))
199 {
200 /* It's not a string, so assume it's a compiled reop object */
201 /* XXX check that! */
202 Py_INCREF(pattern);
203 return (reopobject*)pattern;
204 }
205 if (cache_dict==NULL)
206 {
207 cache_dict=PyDict_New();
208 if (cache_dict==NULL)
209 {
210 return (reopobject*)NULL;
211 }
212 }
213
214 /* See if the pattern has already been cached; if so, return that
215 reop object */
216 p2=(reopobject*)PyDict_GetItem(cache_dict, pattern);
217 if (p2)
218 {
219 Py_INCREF(p2);
220 return (reopobject*)p2;
221 }
222
223 /* Compile the pattern and cache it */
224 p2=(reopobject*)newreopobject(pattern, NULL, pattern, NULL);
225 if (!p2) return p2;
226 PyDict_SetItem(cache_dict, pattern, (PyObject*)p2);
227 return p2;
228}
229
230
231static PyObject *
232internal_split(args, retain)
233 PyObject *args;
234 int retain;
235{
236 PyObject *newlist, *s;
237 reopobject *pattern;
238 int maxsplit=0, count=0, length, next=0, result;
239 int match_end=0; /* match_start is defined below */
240 char *start;
241
242 if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
243 &maxsplit))
244 {
245 PyErr_Clear();
246 if (!PyArg_ParseTuple(args, "s#O", &start, &length, &pattern))
247 return NULL;
248 }
249 pattern=cached_compile((PyObject *)pattern);
250 if (!pattern) return NULL;
251
252 newlist=PyList_New(0);
253 if (!newlist) return NULL;
254
255 do
256 {
257 result = re_search(&pattern->re_patbuf,
258 start, length, next, length-next,
259 &pattern->re_regs);
260 if (result < -1)
261 { /* Erk... an error happened during the reop search */
262 Py_DECREF(newlist);
263 PyErr_SetString(ReopError, "match failure");
264 return NULL;
265 }
266 if (next<=result)
267 {
268 int match_start=pattern->re_regs.start[0];
269 int oldmatch_end=match_end;
270 match_end=pattern->re_regs.end[0];
271
272 if (match_start==match_end)
273 { /* A zero-length match; increment to the next position */
274 next=result+1;
275 match_end=oldmatch_end;
276 continue;
277 }
278
279 /* Append the string up to the start of the match */
280 s=PyString_FromStringAndSize(start+oldmatch_end, match_start-oldmatch_end);
281 if (!s)
282 {
283 Py_DECREF(newlist);
284 return NULL;
285 }
286 PyList_Append(newlist, s);
287 Py_DECREF(s);
288
289 if (retain)
290 {
291 /* Append a string containing whatever matched */
292 s=PyString_FromStringAndSize(start+match_start, match_end-match_start);
293 if (!s)
294 {
295 Py_DECREF(newlist);
296 return NULL;
297 }
298 PyList_Append(newlist, s);
299 Py_DECREF(s);
300 }
301 /* Update the pointer, and increment the count of splits */
302 next=match_end; count++;
303 }
304 } while (result!=-1 && !(maxsplit && maxsplit==count) &&
305 next<length);
306 s=PyString_FromStringAndSize(start+match_end, length-match_end);
307 if (!s)
308 {
309 Py_DECREF(newlist);
310 return NULL;
311 }
312 PyList_Append(newlist, s);
313 Py_DECREF(s);
314 Py_DECREF(pattern);
315 return newlist;
316}
317
318static PyObject *
319reop_split(self, args)
320 PyObject *self;
321 PyObject *args;
322{
323 return internal_split(args, 0);
324}
325
326static PyObject *
327reop_splitx(self, args)
328 PyObject *self;
329 PyObject *args;
330{
331 return internal_split(args, 1);
332}
333#endif
334
335static struct PyMethodDef reop_global_methods[] = {
336 {"match", reop_match, 0},
337 {"search", reop_search, 0},
338#if 0
339 {"split", reop_split, 0},
340 {"splitx", reop_splitx, 0},
341#endif
342 {NULL, NULL} /* sentinel */
343};
344
345void
346initreop()
347{
348 PyObject *m, *d, *v;
349 int i;
350 char *s;
351
352 m = Py_InitModule("reop", reop_global_methods);
353 d = PyModule_GetDict(m);
354
355 /* Initialize reop.error exception */
356 v = ReopError = PyString_FromString("reop.error");
357 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
358 goto finally;
359
360 /* Initialize reop.casefold constant */
361 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
362 goto finally;
363
364 if (!(s = PyString_AsString(v)))
365 goto finally;
366
367 for (i = 0; i < 256; i++) {
368 if (isupper(i))
369 s[i] = tolower(i);
370 else
371 s[i] = i;
372 }
373 if (PyDict_SetItemString(d, "casefold", v) < 0)
374 goto finally;
375 Py_DECREF(v);
376
377 if (!PyErr_Occurred())
378 return;
379 finally:
380 Py_FatalError("can't initialize reop module");
381}