blob: 13c6cb216fb7238c13eaba111694c17053424ffc [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum36d330b1993-02-21 20:12:16 +00007Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
8Amsterdam, The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
12Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
14provided that the above copyright notice appear in all copies and that
15both that copyright notice and this permission notice appear in
16supporting documentation, and that the names of Stichting Mathematisch
17Centrum or CWI not be used in advertising or publicity pertaining to
18distribution of the software without specific, written prior permission.
19
20STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
21THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
22FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
23FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
24WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
25ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
26OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
27
28******************************************************************/
29
30/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000031/* This uses Tatu Ylonen's copyleft-free reimplementation of
32 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000033
34#include "allobjects.h"
35#include "modsupport.h"
36
Guido van Rossum1cab95c1992-01-19 16:31:57 +000037#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000038
39static object *RegexError; /* Exception */
40
41typedef struct {
42 OB_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000043 struct re_pattern_buffer re_patbuf; /* The compiled expression */
44 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000045 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossum36d330b1993-02-21 20:12:16 +000046 object *re_translate; /* String object for translate table */
47 object *re_lastok; /* String object last matched/searched */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048} regexobject;
49
50/* Regex object methods */
51
52static void
53reg_dealloc(re)
54 regexobject *re;
55{
Guido van Rossum36d330b1993-02-21 20:12:16 +000056 XDECREF(re->re_translate);
57 XDECREF(re->re_lastok);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000058 XDEL(re->re_patbuf.buffer);
59 XDEL(re->re_patbuf.translate);
60 DEL(re);
61}
62
63static object *
64makeresult(regs)
65 struct re_registers *regs;
66{
67 object *v = newtupleobject(RE_NREGS);
68 if (v != NULL) {
69 int i;
70 for (i = 0; i < RE_NREGS; i++) {
Guido van Rossum36d330b1993-02-21 20:12:16 +000071 object *w;
72 w = mkvalue("(ii)", regs->start[i], regs->end[i]);
73 if (w == NULL) {
74 XDECREF(v);
75 v = NULL;
76 break;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000077 }
78 settupleitem(v, i, w);
79 }
80 }
81 return v;
82}
83
84static object *
85reg_match(re, args)
86 regexobject *re;
87 object *args;
88{
Guido van Rossum36d330b1993-02-21 20:12:16 +000089 object *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000090 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000091 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000092 int offset;
93 int result;
Guido van Rossum36d330b1993-02-21 20:12:16 +000094 if (getargs(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000095 offset = 0;
96 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +000097 else {
98 err_clear();
Guido van Rossum36d330b1993-02-21 20:12:16 +000099 if (!getargs(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000100 return NULL;
101 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000102 buffer = getstringvalue(argstring);
103 size = getstringsize(argstring);
104 if (offset < 0 || offset > size) {
105 err_setstr(RegexError, "match offset out of range");
106 return NULL;
107 }
108 XDECREF(re->re_lastok);
109 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000110 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000111 if (result < -1) {
112 /* Failure like stack overflow */
113 err_setstr(RegexError, "match failure");
114 return NULL;
115 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000116 if (result >= 0) {
117 INCREF(argstring);
118 re->re_lastok = argstring;
119 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000120 return newintobject((long)result); /* Length of the match or -1 */
121}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000122
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000123static object *
124reg_search(re, args)
125 regexobject *re;
126 object *args;
127{
Guido van Rossum36d330b1993-02-21 20:12:16 +0000128 object *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000129 char *buffer;
130 int size;
131 int offset;
132 int range;
133 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000134
Guido van Rossum36d330b1993-02-21 20:12:16 +0000135 if (getargs(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000136 offset = 0;
137 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000138 else {
139 err_clear();
Guido van Rossum36d330b1993-02-21 20:12:16 +0000140 if (!getargs(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000141 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000142 }
143 buffer = getstringvalue(argstring);
144 size = getstringsize(argstring);
145 if (offset < 0 || offset > size) {
146 err_setstr(RegexError, "search offset out of range");
147 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000148 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000149 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
150 the implementation don't match: the documentation states that
151 |range| positions are tried, while the code tries |range|+1
152 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000153 range = size - offset;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000154 XDECREF(re->re_lastok);
155 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000156 result = re_search(&re->re_patbuf, buffer, size, offset, range,
157 &re->re_regs);
158 if (result < -1) {
159 /* Failure like stack overflow */
160 err_setstr(RegexError, "match failure");
161 return NULL;
162 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000163 if (result >= 0) {
164 INCREF(argstring);
165 re->re_lastok = argstring;
166 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000167 return newintobject((long)result); /* Position of the match or -1 */
168}
169
Guido van Rossum36d330b1993-02-21 20:12:16 +0000170static object *
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000171reg_group(re, args)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000172 regexobject *re;
173 object *args;
174{
175 int i, a, b;
176 if (args != NULL && is_tupleobject(args)) {
177 int n = gettuplesize(args);
178 object *res = newtupleobject(n);
179 if (res == NULL)
180 return NULL;
181 for (i = 0; i < n; i++) {
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000182 object *v = reg_group(re, gettupleitem(args, i));
Guido van Rossum36d330b1993-02-21 20:12:16 +0000183 if (v == NULL) {
184 DECREF(res);
185 return NULL;
186 }
187 settupleitem(res, i, v);
188 }
189 return res;
190 }
191 if (!getargs(args, "i", &i))
192 return NULL;
193 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000194 err_setstr(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000195 return NULL;
196 }
197 if (re->re_lastok == NULL) {
198 err_setstr(RegexError,
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000199 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000200 return NULL;
201 }
202 a = re->re_regs.start[i];
203 b = re->re_regs.end[i];
204 if (a < 0 || b < 0) {
205 INCREF(None);
206 return None;
207 }
208 return newsizedstringobject(getstringvalue(re->re_lastok)+a, b-a);
209}
210
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000211static struct methodlist reg_methods[] = {
212 {"match", reg_match},
213 {"search", reg_search},
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000214 {"group", reg_group},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000215 {NULL, NULL} /* sentinel */
216};
217
218static object *
219reg_getattr(re, name)
220 regexobject *re;
221 char *name;
222{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000223 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000224 if (re->re_lastok == NULL) {
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000225 INCREF(None);
226 return None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000227 }
228 return makeresult(&re->re_regs);
229 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000230 if (strcmp(name, "last") == 0) {
231 if (re->re_lastok == NULL) {
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000232 INCREF(None);
233 return None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000234 }
235 INCREF(re->re_lastok);
236 return re->re_lastok;
237 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000238 if (strcmp(name, "translate") == 0) {
239 if (re->re_translate == NULL) {
240 INCREF(None);
241 return None;
242 }
243 INCREF(re->re_translate);
244 return re->re_translate;
245 }
246 if (strcmp(name, "__members__") == 0) {
247 object *list = newlistobject(3);
248 if (list) {
249 setlistitem(list, 0, newstringobject("last"));
250 setlistitem(list, 1, newstringobject("regs"));
251 setlistitem(list, 2, newstringobject("translate"));
252 if (err_occurred()) {
253 DECREF(list);
254 list = NULL;
255 }
256 }
257 return list;
258 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000259 return findmethod(reg_methods, (object *)re, name);
260}
261
262static typeobject Regextype = {
263 OB_HEAD_INIT(&Typetype)
264 0, /*ob_size*/
265 "regex", /*tp_name*/
266 sizeof(regexobject), /*tp_size*/
267 0, /*tp_itemsize*/
268 /* methods */
269 reg_dealloc, /*tp_dealloc*/
270 0, /*tp_print*/
271 reg_getattr, /*tp_getattr*/
272 0, /*tp_setattr*/
273 0, /*tp_compare*/
274 0, /*tp_repr*/
275};
276
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000277static object *
Guido van Rossum36d330b1993-02-21 20:12:16 +0000278newregexobject(pat, size, translate)
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000279 char *pat;
280 int size;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000281 object *translate;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000282{
283 regexobject *re;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000284 if (translate != NULL && getstringsize(translate) != 256) {
285 err_setstr(RegexError,
286 "translation table must be 256 bytes");
287 return NULL;
288 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000289 re = NEWOBJ(regexobject, &Regextype);
290 if (re != NULL) {
291 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000292 re->re_patbuf.buffer = NULL;
293 re->re_patbuf.allocated = 0;
294 re->re_patbuf.fastmap = re->re_fastmap;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000295 if (translate)
296 re->re_patbuf.translate = getstringvalue(translate);
297 else
298 re->re_patbuf.translate = NULL;
299 XINCREF(translate);
300 re->re_translate = translate;
301 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000302 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000303 if (error != NULL) {
304 err_setstr(RegexError, error);
305 DECREF(re);
306 re = NULL;
307 }
308 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000309 return (object *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000310}
311
312static object *
313regex_compile(self, args)
314 object *self;
315 object *args;
316{
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000317 char *pat;
318 int size;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000319 object *tran = NULL;
320 if (!getargs(args, "s#", &pat, &size)) {
321 err_clear();
322 if (!getargs(args, "(s#S)", &pat, &size, &tran))
323 return NULL;
324 }
325 return newregexobject(pat, size, tran);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000326}
327
328static object *cache_pat;
329static object *cache_prog;
330
331static int
332update_cache(pat)
333 object *pat;
334{
335 if (pat != cache_pat) {
336 XDECREF(cache_pat);
337 cache_pat = NULL;
338 XDECREF(cache_prog);
339 cache_prog = regex_compile((object *)NULL, pat);
340 if (cache_prog == NULL)
341 return -1;
342 cache_pat = pat;
343 INCREF(cache_pat);
344 }
345 return 0;
346}
347
348static object *
349regex_match(self, args)
350 object *self;
351 object *args;
352{
353 object *pat, *string;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000354 if (!getargs(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000355 return NULL;
356 if (update_cache(pat) < 0)
357 return NULL;
358 return reg_match((regexobject *)cache_prog, string);
359}
360
361static object *
362regex_search(self, args)
363 object *self;
364 object *args;
365{
366 object *pat, *string;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000367 if (!getargs(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000368 return NULL;
369 if (update_cache(pat) < 0)
370 return NULL;
371 return reg_search((regexobject *)cache_prog, string);
372}
373
374static object *
375regex_set_syntax(self, args)
376 object *self, *args;
377{
378 int syntax;
379 if (!getintarg(args, &syntax))
380 return NULL;
381 syntax = re_set_syntax(syntax);
382 return newintobject((long)syntax);
383}
384
385static struct methodlist regex_global_methods[] = {
386 {"compile", regex_compile},
387 {"match", regex_match},
388 {"search", regex_search},
389 {"set_syntax", regex_set_syntax},
390 {NULL, NULL} /* sentinel */
391};
392
393initregex()
394{
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000395 object *m, *d, *v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000396
397 m = initmodule("regex", regex_global_methods);
398 d = getmoduledict(m);
399
400 /* Initialize regex.error exception */
401 RegexError = newstringobject("regex.error");
402 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
403 fatal("can't define regex.error");
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000404
405 /* Initialize regex.casefold constant */
406 v = newsizedstringobject((char *)NULL, 256);
407 if (v != NULL) {
408 int i;
409 char *s = getstringvalue(v);
410 for (i = 0; i < 256; i++) {
411 if (isupper(i))
412 s[i] = tolower(i);
413 else
414 s[i] = i;
415 }
416 dictinsert(d, "casefold", v);
417 DECREF(v);
418 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000419}