blob: 0817626d473f969c44c69d8304d80a80b9f5352a [file] [log] [blame]
Guido van Rossumdb25f321997-07-10 14:31:32 +00001/***********************************************************
2Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* $Id$ */
33
34/* Regular expression objects */
35/* This uses Tatu Ylonen's copyleft-free reimplementation of
36 GNU regular expressions */
37
38#include "Python.h"
39
40#include <ctype.h>
41
42#include "regexpr.h"
43
44static PyObject *ReopError; /* Exception */
45
Guido van Rossum74fb3031997-07-17 22:41:38 +000046#define IGNORECASE 0x01
47#define MULTILINE 0x02
48#define DOTALL 0x04
49#define VERBOSE 0x08
50
51static char *reop_casefold;
52
Guido van Rossumdb25f321997-07-10 14:31:32 +000053static PyObject *
54makeresult(regs, num_regs)
55 struct re_registers *regs;
56 int num_regs;
57{
58 PyObject *v;
59 int i;
60 static PyObject *filler = NULL;
61
62 if (filler == NULL) {
63 filler = Py_BuildValue("(ii)", -1, -1);
64 if (filler == NULL)
65 return NULL;
66 }
67 v = PyTuple_New(num_regs);
68 if (v == NULL)
69 return NULL;
70
71 for (i = 0; i < num_regs; i++) {
72 int lo = regs->start[i];
73 int hi = regs->end[i];
74 PyObject *w;
75 if (lo == -1 && hi == -1) {
76 w = filler;
77 Py_INCREF(w);
78 }
79 else
80 w = Py_BuildValue("(ii)", lo, hi);
81 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
82 Py_DECREF(v);
83 return NULL;
84 }
85 }
86 return v;
87}
88
89static PyObject *
90reop_match(self, args)
91 PyObject *self;
92 PyObject *args;
93{
94 char *string;
95 int fastmaplen, stringlen;
96 int can_be_null, anchor, i;
Guido van Rossum04a1d741997-07-15 14:38:13 +000097 int flags, pos, result;
Guido van Rossumdb25f321997-07-10 14:31:32 +000098 struct re_pattern_buffer bufp;
99 struct re_registers re_regs;
Guido van Rossum74fb3031997-07-17 22:41:38 +0000100 PyObject *modules = NULL;
101 PyObject *reopmodule = NULL;
102 PyObject *reopdict = NULL;
103 PyObject *casefold = NULL;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000104
105 if (!PyArg_Parse(args, "(s#iiis#is#i)",
106 &(bufp.buffer), &(bufp.allocated),
Guido van Rossum04a1d741997-07-15 14:38:13 +0000107 &(bufp.num_registers), &flags, &can_be_null,
Guido van Rossumdb25f321997-07-10 14:31:32 +0000108 &(bufp.fastmap), &fastmaplen,
109 &anchor,
110 &string, &stringlen,
111 &pos))
112 return NULL;
113
114 /* XXX sanity-check the input data */
115 bufp.used=bufp.allocated;
Guido van Rossum74fb3031997-07-17 22:41:38 +0000116 if (flags & IGNORECASE)
117 {
118 if ((modules = PyImport_GetModuleDict()) == NULL)
119 return NULL;
120
121 if ((reopmodule = PyDict_GetItemString(modules,
122 "reop")) == NULL)
123 return NULL;
124
125 if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
126 return NULL;
127
128 if ((casefold = PyDict_GetItemString(reopdict,
129 "casefold")) == NULL)
130 return NULL;
131
132 bufp.translate = PyString_AsString(casefold);
133 }
134 else
135 bufp.translate=NULL;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000136 bufp.fastmap_accurate=1;
137 bufp.can_be_null=can_be_null;
138 bufp.uses_registers=1;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000139 bufp.anchor=anchor;
140
Guido van Rossum74fb3031997-07-17 22:41:38 +0000141 for(i=0; i<bufp.num_registers; i++) {
142 re_regs.start[i]=-1;
143 re_regs.end[i]=-1;
144 }
Guido van Rossumdb25f321997-07-10 14:31:32 +0000145
146 result = re_match(&bufp,
147 string, stringlen, pos,
148 &re_regs);
Guido van Rossum74fb3031997-07-17 22:41:38 +0000149
Guido van Rossumdb25f321997-07-10 14:31:32 +0000150 if (result < -1) {
151 /* Failure like stack overflow */
152 PyErr_SetString(ReopError, "match failure");
Guido van Rossum74fb3031997-07-17 22:41:38 +0000153
Guido van Rossumdb25f321997-07-10 14:31:32 +0000154 return NULL;
155 }
Guido van Rossum63e18191997-07-11 11:08:38 +0000156 if (result == -1) {
157 Py_INCREF(Py_None);
158 return Py_None;
159 }
Guido van Rossum04a1d741997-07-15 14:38:13 +0000160 return makeresult(&re_regs, bufp.num_registers);
Guido van Rossumdb25f321997-07-10 14:31:32 +0000161}
162
163static PyObject *
164reop_search(self, args)
165 PyObject *self;
166 PyObject *args;
167{
168 char *string;
169 int fastmaplen, stringlen;
170 int can_be_null, anchor, i;
Guido van Rossum04a1d741997-07-15 14:38:13 +0000171 int flags, pos, result;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000172 struct re_pattern_buffer bufp;
173 struct re_registers re_regs;
Guido van Rossum74fb3031997-07-17 22:41:38 +0000174 PyObject *modules = NULL;
175 PyObject *reopmodule = NULL;
176 PyObject *reopdict = NULL;
177 PyObject *casefold = NULL;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000178
179 if (!PyArg_Parse(args, "(s#iiis#is#i)",
180 &(bufp.buffer), &(bufp.allocated),
Guido van Rossum04a1d741997-07-15 14:38:13 +0000181 &(bufp.num_registers), &flags, &can_be_null,
Guido van Rossumdb25f321997-07-10 14:31:32 +0000182 &(bufp.fastmap), &fastmaplen,
183 &anchor,
184 &string, &stringlen,
185 &pos))
186 return NULL;
187
188 /* XXX sanity-check the input data */
189 bufp.used=bufp.allocated;
Guido van Rossum74fb3031997-07-17 22:41:38 +0000190 if (flags & IGNORECASE)
191 {
192 if ((modules = PyImport_GetModuleDict()) == NULL)
193 return NULL;
194
195 if ((reopmodule = PyDict_GetItemString(modules,
196 "reop")) == NULL)
197 return NULL;
198
199 if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
200 return NULL;
201
202 if ((casefold = PyDict_GetItemString(reopdict,
203 "casefold")) == NULL)
204 return NULL;
205
206 bufp.translate = PyString_AsString(casefold);
207 }
208 else
209 bufp.translate=NULL;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000210 bufp.fastmap_accurate=1;
211 bufp.can_be_null=can_be_null;
212 bufp.uses_registers=1;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000213 bufp.anchor=anchor;
214
Guido van Rossum74fb3031997-07-17 22:41:38 +0000215 for(i = 0; i < bufp.num_registers; i++) {
216 re_regs.start[i] = -1;
217 re_regs.end[i] = -1;
218 }
Guido van Rossumdb25f321997-07-10 14:31:32 +0000219
220 result = re_search(&bufp,
221 string, stringlen, pos, stringlen-pos,
222 &re_regs);
Guido van Rossum74fb3031997-07-17 22:41:38 +0000223
Guido van Rossumdb25f321997-07-10 14:31:32 +0000224 if (result < -1) {
225 /* Failure like stack overflow */
226 PyErr_SetString(ReopError, "match failure");
227 return NULL;
228 }
Guido van Rossum74fb3031997-07-17 22:41:38 +0000229
Guido van Rossum63e18191997-07-11 11:08:38 +0000230 if (result == -1) {
231 Py_INCREF(Py_None);
232 return Py_None;
233 }
Guido van Rossum74fb3031997-07-17 22:41:38 +0000234
Guido van Rossum04a1d741997-07-15 14:38:13 +0000235 return makeresult(&re_regs, bufp.num_registers);
Guido van Rossumdb25f321997-07-10 14:31:32 +0000236}
237
238#if 0
239/* Functions originally in the regsub module.
240 Added June 1, 1997.
241 */
242
243/* A cache of previously used patterns is maintained. Notice that if
244 you change the reop syntax flag, entries in the cache are
245 invalidated.
246 XXX Solution: use (syntax flag, pattern) as keys? Clear the cache
247 every so often, or once it gets past a certain size?
248*/
249
250static PyObject *cache_dict=NULL;
251
252/* Accept an object; if it's a reop pattern, Py_INCREF it and return
253 it. If it's a string, a reop object is compiled and cached.
254*/
255
256static reopobject *
257cached_compile(pattern)
258 PyObject *pattern;
259{
260 reopobject *p2;
261
262 if (!PyString_Check(pattern))
263 {
264 /* It's not a string, so assume it's a compiled reop object */
265 /* XXX check that! */
266 Py_INCREF(pattern);
267 return (reopobject*)pattern;
268 }
269 if (cache_dict==NULL)
270 {
271 cache_dict=PyDict_New();
272 if (cache_dict==NULL)
273 {
274 return (reopobject*)NULL;
275 }
276 }
277
278 /* See if the pattern has already been cached; if so, return that
279 reop object */
280 p2=(reopobject*)PyDict_GetItem(cache_dict, pattern);
281 if (p2)
282 {
283 Py_INCREF(p2);
284 return (reopobject*)p2;
285 }
286
287 /* Compile the pattern and cache it */
288 p2=(reopobject*)newreopobject(pattern, NULL, pattern, NULL);
289 if (!p2) return p2;
290 PyDict_SetItem(cache_dict, pattern, (PyObject*)p2);
291 return p2;
292}
293
294
295static PyObject *
296internal_split(args, retain)
297 PyObject *args;
298 int retain;
299{
300 PyObject *newlist, *s;
301 reopobject *pattern;
302 int maxsplit=0, count=0, length, next=0, result;
303 int match_end=0; /* match_start is defined below */
304 char *start;
305
306 if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
307 &maxsplit))
308 {
309 PyErr_Clear();
310 if (!PyArg_ParseTuple(args, "s#O", &start, &length, &pattern))
311 return NULL;
312 }
313 pattern=cached_compile((PyObject *)pattern);
314 if (!pattern) return NULL;
315
316 newlist=PyList_New(0);
317 if (!newlist) return NULL;
318
319 do
320 {
321 result = re_search(&pattern->re_patbuf,
322 start, length, next, length-next,
323 &pattern->re_regs);
324 if (result < -1)
325 { /* Erk... an error happened during the reop search */
326 Py_DECREF(newlist);
327 PyErr_SetString(ReopError, "match failure");
328 return NULL;
329 }
330 if (next<=result)
331 {
332 int match_start=pattern->re_regs.start[0];
333 int oldmatch_end=match_end;
334 match_end=pattern->re_regs.end[0];
335
336 if (match_start==match_end)
337 { /* A zero-length match; increment to the next position */
338 next=result+1;
339 match_end=oldmatch_end;
340 continue;
341 }
342
343 /* Append the string up to the start of the match */
344 s=PyString_FromStringAndSize(start+oldmatch_end, match_start-oldmatch_end);
345 if (!s)
346 {
347 Py_DECREF(newlist);
348 return NULL;
349 }
350 PyList_Append(newlist, s);
351 Py_DECREF(s);
352
353 if (retain)
354 {
355 /* Append a string containing whatever matched */
356 s=PyString_FromStringAndSize(start+match_start, match_end-match_start);
357 if (!s)
358 {
359 Py_DECREF(newlist);
360 return NULL;
361 }
362 PyList_Append(newlist, s);
363 Py_DECREF(s);
364 }
365 /* Update the pointer, and increment the count of splits */
366 next=match_end; count++;
367 }
368 } while (result!=-1 && !(maxsplit && maxsplit==count) &&
369 next<length);
370 s=PyString_FromStringAndSize(start+match_end, length-match_end);
371 if (!s)
372 {
373 Py_DECREF(newlist);
374 return NULL;
375 }
376 PyList_Append(newlist, s);
377 Py_DECREF(s);
378 Py_DECREF(pattern);
379 return newlist;
380}
381
382static PyObject *
383reop_split(self, args)
384 PyObject *self;
385 PyObject *args;
386{
387 return internal_split(args, 0);
388}
389
390static PyObject *
391reop_splitx(self, args)
392 PyObject *self;
393 PyObject *args;
394{
395 return internal_split(args, 1);
396}
397#endif
398
399static struct PyMethodDef reop_global_methods[] = {
400 {"match", reop_match, 0},
401 {"search", reop_search, 0},
402#if 0
403 {"split", reop_split, 0},
404 {"splitx", reop_splitx, 0},
405#endif
406 {NULL, NULL} /* sentinel */
407};
408
409void
410initreop()
411{
Guido van Rossum74fb3031997-07-17 22:41:38 +0000412 PyObject *m, *d, *k, *v, *o;
Guido van Rossumdb25f321997-07-10 14:31:32 +0000413 int i;
414 char *s;
Guido van Rossum74fb3031997-07-17 22:41:38 +0000415 char j[2];
416
417 re_compile_initialize();
418
Guido van Rossumdb25f321997-07-10 14:31:32 +0000419 m = Py_InitModule("reop", reop_global_methods);
420 d = PyModule_GetDict(m);
421
422 /* Initialize reop.error exception */
423 v = ReopError = PyString_FromString("reop.error");
424 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
425 goto finally;
426
427 /* Initialize reop.casefold constant */
428 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
429 goto finally;
430
431 if (!(s = PyString_AsString(v)))
432 goto finally;
433
434 for (i = 0; i < 256; i++) {
435 if (isupper(i))
436 s[i] = tolower(i);
437 else
438 s[i] = i;
439 }
Guido van Rossum74fb3031997-07-17 22:41:38 +0000440
Guido van Rossumdb25f321997-07-10 14:31:32 +0000441 if (PyDict_SetItemString(d, "casefold", v) < 0)
442 goto finally;
443 Py_DECREF(v);
444
Guido van Rossum74fb3031997-07-17 22:41:38 +0000445 /* Initialize the syntax table */
446
447 o = PyDict_New();
448 if (o == NULL)
449 goto finally;
450
451 j[1] = '\0';
452 for (i = 0; i < 256; i++)
453 {
454 j[0] = i;
455 k = PyString_FromStringAndSize(j, 1);
456 if (k == NULL)
457 goto finally;
458 v = PyInt_FromLong(re_syntax_table[i]);
459 if (v == NULL)
460 goto finally;
461 if (PyDict_SetItem(o, k, v) < 0)
462 goto finally;
463 Py_DECREF(k);
464 Py_DECREF(v);
465 }
466
467 if (PyDict_SetItemString(d, "syntax_table", o) < 0)
468 goto finally;
469 Py_DECREF(o);
470
471 v = PyInt_FromLong(Sword);
472 if (v == NULL)
473 goto finally;
474
475 if (PyDict_SetItemString(d, "word", v) < 0)
476 goto finally;
477 Py_DECREF(v);
478
479 v = PyInt_FromLong(Swhitespace);
480 if (v == NULL)
481 goto finally;
482
483 if (PyDict_SetItemString(d, "whitespace", v) < 0)
484 goto finally;
485 Py_DECREF(v);
486
487 v = PyInt_FromLong(Sdigit);
488 if (v == NULL)
489 goto finally;
490
491 if (PyDict_SetItemString(d, "digit", v) < 0)
492 goto finally;
493 Py_DECREF(v);
494
Guido van Rossumdb25f321997-07-10 14:31:32 +0000495 if (!PyErr_Occurred())
496 return;
Guido van Rossum74fb3031997-07-17 22:41:38 +0000497
Guido van Rossumdb25f321997-07-10 14:31:32 +0000498 finally:
499 Py_FatalError("can't initialize reop module");
500}