blob: 2b23a73824e388951584783c813da8e36d2e7086 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum36d330b1993-02-21 20:12:16 +00007Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum,
8Amsterdam, The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
12Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
14provided that the above copyright notice appear in all copies and that
15both that copyright notice and this permission notice appear in
16supporting documentation, and that the names of Stichting Mathematisch
17Centrum or CWI not be used in advertising or publicity pertaining to
18distribution of the software without specific, written prior permission.
19
20STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
21THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
22FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
23FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
24WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
25ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
26OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
27
28******************************************************************/
29
30/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000031/* This uses Tatu Ylonen's copyleft-free reimplementation of
32 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000033
34#include "allobjects.h"
35#include "modsupport.h"
36
Guido van Rossum1cab95c1992-01-19 16:31:57 +000037#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000038
39static object *RegexError; /* Exception */
40
41typedef struct {
42 OB_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000043 struct re_pattern_buffer re_patbuf; /* The compiled expression */
44 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000045 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossum36d330b1993-02-21 20:12:16 +000046 object *re_translate; /* String object for translate table */
47 object *re_lastok; /* String object last matched/searched */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048} regexobject;
49
50/* Regex object methods */
51
52static void
53reg_dealloc(re)
54 regexobject *re;
55{
Guido van Rossum36d330b1993-02-21 20:12:16 +000056 XDECREF(re->re_translate);
57 XDECREF(re->re_lastok);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000058 XDEL(re->re_patbuf.buffer);
59 XDEL(re->re_patbuf.translate);
60 DEL(re);
61}
62
63static object *
64makeresult(regs)
65 struct re_registers *regs;
66{
67 object *v = newtupleobject(RE_NREGS);
68 if (v != NULL) {
69 int i;
70 for (i = 0; i < RE_NREGS; i++) {
Guido van Rossum36d330b1993-02-21 20:12:16 +000071 object *w;
72 w = mkvalue("(ii)", regs->start[i], regs->end[i]);
73 if (w == NULL) {
74 XDECREF(v);
75 v = NULL;
76 break;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000077 }
78 settupleitem(v, i, w);
79 }
80 }
81 return v;
82}
83
84static object *
85reg_match(re, args)
86 regexobject *re;
87 object *args;
88{
Guido van Rossum36d330b1993-02-21 20:12:16 +000089 object *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000090 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000091 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000092 int offset;
93 int result;
Guido van Rossum36d330b1993-02-21 20:12:16 +000094 if (getargs(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000095 offset = 0;
96 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +000097 else {
98 err_clear();
Guido van Rossum36d330b1993-02-21 20:12:16 +000099 if (!getargs(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000100 return NULL;
101 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000102 buffer = getstringvalue(argstring);
103 size = getstringsize(argstring);
104 if (offset < 0 || offset > size) {
105 err_setstr(RegexError, "match offset out of range");
106 return NULL;
107 }
108 XDECREF(re->re_lastok);
109 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000110 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000111 if (result < -1) {
112 /* Failure like stack overflow */
113 err_setstr(RegexError, "match failure");
114 return NULL;
115 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000116 if (result >= 0) {
117 INCREF(argstring);
118 re->re_lastok = argstring;
119 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000120 return newintobject((long)result); /* Length of the match or -1 */
121}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000122
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000123static object *
124reg_search(re, args)
125 regexobject *re;
126 object *args;
127{
Guido van Rossum36d330b1993-02-21 20:12:16 +0000128 object *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000129 char *buffer;
130 int size;
131 int offset;
132 int range;
133 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000134
Guido van Rossum36d330b1993-02-21 20:12:16 +0000135 if (getargs(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000136 offset = 0;
137 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000138 else {
139 err_clear();
Guido van Rossum36d330b1993-02-21 20:12:16 +0000140 if (!getargs(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000141 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000142 }
143 buffer = getstringvalue(argstring);
144 size = getstringsize(argstring);
145 if (offset < 0 || offset > size) {
146 err_setstr(RegexError, "search offset out of range");
147 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000148 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000149 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
150 the implementation don't match: the documentation states that
151 |range| positions are tried, while the code tries |range|+1
152 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000153 range = size - offset;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000154 XDECREF(re->re_lastok);
155 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000156 result = re_search(&re->re_patbuf, buffer, size, offset, range,
157 &re->re_regs);
158 if (result < -1) {
159 /* Failure like stack overflow */
160 err_setstr(RegexError, "match failure");
161 return NULL;
162 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000163 if (result >= 0) {
164 INCREF(argstring);
165 re->re_lastok = argstring;
166 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000167 return newintobject((long)result); /* Position of the match or -1 */
168}
169
Guido van Rossum36d330b1993-02-21 20:12:16 +0000170static object *
171reg_substring(re, args)
172 regexobject *re;
173 object *args;
174{
175 int i, a, b;
176 if (args != NULL && is_tupleobject(args)) {
177 int n = gettuplesize(args);
178 object *res = newtupleobject(n);
179 if (res == NULL)
180 return NULL;
181 for (i = 0; i < n; i++) {
182 object *v = reg_substring(re, gettupleitem(args, i));
183 if (v == NULL) {
184 DECREF(res);
185 return NULL;
186 }
187 settupleitem(res, i, v);
188 }
189 return res;
190 }
191 if (!getargs(args, "i", &i))
192 return NULL;
193 if (i < 0 || i >= RE_NREGS) {
194 err_setstr(RegexError, "substring() index out of range");
195 return NULL;
196 }
197 if (re->re_lastok == NULL) {
198 err_setstr(RegexError,
199 "substring() only valid after successful match/search");
200 return NULL;
201 }
202 a = re->re_regs.start[i];
203 b = re->re_regs.end[i];
204 if (a < 0 || b < 0) {
205 INCREF(None);
206 return None;
207 }
208 return newsizedstringobject(getstringvalue(re->re_lastok)+a, b-a);
209}
210
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000211static struct methodlist reg_methods[] = {
212 {"match", reg_match},
213 {"search", reg_search},
Guido van Rossum36d330b1993-02-21 20:12:16 +0000214 {"substring", reg_substring},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000215 {NULL, NULL} /* sentinel */
216};
217
218static object *
219reg_getattr(re, name)
220 regexobject *re;
221 char *name;
222{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000223 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000224 if (re->re_lastok == NULL) {
Guido van Rossumb824fc61992-01-01 14:52:16 +0000225 err_setstr(RegexError,
226 "regs only valid after successful match/search");
227 return NULL;
228 }
229 return makeresult(&re->re_regs);
230 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000231 if (strcmp(name, "last") == 0) {
232 if (re->re_lastok == NULL) {
233 err_setstr(RegexError,
234 "last only valid after successful match/search");
235 return NULL;
236 }
237 INCREF(re->re_lastok);
238 return re->re_lastok;
239 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000240 return findmethod(reg_methods, (object *)re, name);
241}
242
243static typeobject Regextype = {
244 OB_HEAD_INIT(&Typetype)
245 0, /*ob_size*/
246 "regex", /*tp_name*/
247 sizeof(regexobject), /*tp_size*/
248 0, /*tp_itemsize*/
249 /* methods */
250 reg_dealloc, /*tp_dealloc*/
251 0, /*tp_print*/
252 reg_getattr, /*tp_getattr*/
253 0, /*tp_setattr*/
254 0, /*tp_compare*/
255 0, /*tp_repr*/
256};
257
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000258static object *
Guido van Rossum36d330b1993-02-21 20:12:16 +0000259newregexobject(pat, size, translate)
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000260 char *pat;
261 int size;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000262 object *translate;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000263{
264 regexobject *re;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000265 if (translate != NULL && getstringsize(translate) != 256) {
266 err_setstr(RegexError,
267 "translation table must be 256 bytes");
268 return NULL;
269 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000270 re = NEWOBJ(regexobject, &Regextype);
271 if (re != NULL) {
272 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000273 re->re_patbuf.buffer = NULL;
274 re->re_patbuf.allocated = 0;
275 re->re_patbuf.fastmap = re->re_fastmap;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000276 if (translate)
277 re->re_patbuf.translate = getstringvalue(translate);
278 else
279 re->re_patbuf.translate = NULL;
280 XINCREF(translate);
281 re->re_translate = translate;
282 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000283 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000284 if (error != NULL) {
285 err_setstr(RegexError, error);
286 DECREF(re);
287 re = NULL;
288 }
289 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000290 return (object *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000291}
292
293static object *
294regex_compile(self, args)
295 object *self;
296 object *args;
297{
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000298 char *pat;
299 int size;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000300 object *tran = NULL;
301 if (!getargs(args, "s#", &pat, &size)) {
302 err_clear();
303 if (!getargs(args, "(s#S)", &pat, &size, &tran))
304 return NULL;
305 }
306 return newregexobject(pat, size, tran);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000307}
308
309static object *cache_pat;
310static object *cache_prog;
311
312static int
313update_cache(pat)
314 object *pat;
315{
316 if (pat != cache_pat) {
317 XDECREF(cache_pat);
318 cache_pat = NULL;
319 XDECREF(cache_prog);
320 cache_prog = regex_compile((object *)NULL, pat);
321 if (cache_prog == NULL)
322 return -1;
323 cache_pat = pat;
324 INCREF(cache_pat);
325 }
326 return 0;
327}
328
329static object *
330regex_match(self, args)
331 object *self;
332 object *args;
333{
334 object *pat, *string;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000335 if (!getargs(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000336 return NULL;
337 if (update_cache(pat) < 0)
338 return NULL;
339 return reg_match((regexobject *)cache_prog, string);
340}
341
342static object *
343regex_search(self, args)
344 object *self;
345 object *args;
346{
347 object *pat, *string;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000348 if (!getargs(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000349 return NULL;
350 if (update_cache(pat) < 0)
351 return NULL;
352 return reg_search((regexobject *)cache_prog, string);
353}
354
355static object *
356regex_set_syntax(self, args)
357 object *self, *args;
358{
359 int syntax;
360 if (!getintarg(args, &syntax))
361 return NULL;
362 syntax = re_set_syntax(syntax);
363 return newintobject((long)syntax);
364}
365
366static struct methodlist regex_global_methods[] = {
367 {"compile", regex_compile},
368 {"match", regex_match},
369 {"search", regex_search},
370 {"set_syntax", regex_set_syntax},
371 {NULL, NULL} /* sentinel */
372};
373
374initregex()
375{
376 object *m, *d;
377
378 m = initmodule("regex", regex_global_methods);
379 d = getmoduledict(m);
380
381 /* Initialize regex.error exception */
382 RegexError = newstringobject("regex.error");
383 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
384 fatal("can't define regex.error");
385}