blob: 1fb35735d3311bc71af0e4935472bff26cae01fd [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
2XXX support translate table
3XXX support range parameter on search
4XXX support mstop parameter on search
5*/
6
7/***********************************************************
8Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
9Netherlands.
10
11 All Rights Reserved
12
13Permission to use, copy, modify, and distribute this software and its
14documentation for any purpose and without fee is hereby granted,
15provided that the above copyright notice appear in all copies and that
16both that copyright notice and this permission notice appear in
17supporting documentation, and that the names of Stichting Mathematisch
18Centrum or CWI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior permission.
20
21STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
22THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
23FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
24FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
25WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
26ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28
29******************************************************************/
30
31/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000032/* This uses Tatu Ylonen's copyleft-free reimplementation of
33 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35#include "allobjects.h"
36#include "modsupport.h"
37
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000039
40static object *RegexError; /* Exception */
41
42typedef struct {
43 OB_HEAD
44 object *re_string; /* The string (for printing) */
45 struct re_pattern_buffer re_patbuf; /* The compiled expression */
46 struct re_registers re_regs; /* The registers from the last match */
47 int re_regs_valid; /* Nonzero if the registers are valid */
48 char re_fastmap[256]; /* Storage for fastmap */
49} regexobject;
50
51/* Regex object methods */
52
53static void
54reg_dealloc(re)
55 regexobject *re;
56{
57 XDECREF(re->re_string);
58 XDEL(re->re_patbuf.buffer);
59 XDEL(re->re_patbuf.translate);
60 DEL(re);
61}
62
63static object *
64makeresult(regs)
65 struct re_registers *regs;
66{
67 object *v = newtupleobject(RE_NREGS);
68 if (v != NULL) {
69 int i;
70 for (i = 0; i < RE_NREGS; i++) {
71 object *w, *u;
72 if ( (w = newtupleobject(2)) == NULL ||
73 (u = newintobject(regs->start[i])) == NULL ||
74 settupleitem(w, 0, u) != 0 ||
75 (u = newintobject(regs->end[i])) == NULL ||
76 settupleitem(w, 1, u) != 0) {
77 XDECREF(w);
78 DECREF(v);
79 return NULL;
80 }
81 settupleitem(v, i, w);
82 }
83 }
84 return v;
85}
86
87static object *
88reg_match(re, args)
89 regexobject *re;
90 object *args;
91{
92 object *v;
93 char *buffer;
94 int offset;
95 int result;
96 if (args != NULL && is_stringobject(args)) {
97 v = args;
98 offset = 0;
99 }
100 else if (!getstrintarg(args, &v, &offset))
101 return NULL;
102 buffer = getstringvalue(v);
103 re->re_regs_valid = 0;
104 result = re_match(&re->re_patbuf, buffer, getstringsize(v),
105 offset, &re->re_regs);
106 if (result < -1) {
107 /* Failure like stack overflow */
108 err_setstr(RegexError, "match failure");
109 return NULL;
110 }
111 re->re_regs_valid = result >= 0;
112 return newintobject((long)result); /* Length of the match or -1 */
113}
114static object *
115reg_search(re, args)
116 regexobject *re;
117 object *args;
118{
119 object *v;
120 char *buffer;
121 int size;
122 int offset;
123 int range;
124 int result;
125 if (args != NULL && is_stringobject(args)) {
126 v = args;
127 offset = 0;
128 }
129 else if (!getstrintarg(args, &v, &offset))
130 return NULL;
131 buffer = getstringvalue(v);
132 size = getstringsize(v);
133 if (offset < 0 || offset > size) {
134 err_setstr(RegexError, "search offset out of range");
135 return NULL;
136 }
Guido van Rossum2d785901992-01-26 18:12:41 +0000137 range = size - offset;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000138 re->re_regs_valid = 0;
139 result = re_search(&re->re_patbuf, buffer, size, offset, range,
140 &re->re_regs);
141 if (result < -1) {
142 /* Failure like stack overflow */
143 err_setstr(RegexError, "match failure");
144 return NULL;
145 }
146 re->re_regs_valid = result >= 0;
147 return newintobject((long)result); /* Position of the match or -1 */
148}
149
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000150static struct methodlist reg_methods[] = {
151 {"match", reg_match},
152 {"search", reg_search},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000153 {NULL, NULL} /* sentinel */
154};
155
156static object *
157reg_getattr(re, name)
158 regexobject *re;
159 char *name;
160{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000161 if (strcmp(name, "regs") == 0) {
162 if (!re->re_regs_valid) {
163 err_setstr(RegexError,
164 "regs only valid after successful match/search");
165 return NULL;
166 }
167 return makeresult(&re->re_regs);
168 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000169 return findmethod(reg_methods, (object *)re, name);
170}
171
172static typeobject Regextype = {
173 OB_HEAD_INIT(&Typetype)
174 0, /*ob_size*/
175 "regex", /*tp_name*/
176 sizeof(regexobject), /*tp_size*/
177 0, /*tp_itemsize*/
178 /* methods */
179 reg_dealloc, /*tp_dealloc*/
180 0, /*tp_print*/
181 reg_getattr, /*tp_getattr*/
182 0, /*tp_setattr*/
183 0, /*tp_compare*/
184 0, /*tp_repr*/
185};
186
187static regexobject *
188newregexobject(string)
189 object *string;
190{
191 regexobject *re;
192 re = NEWOBJ(regexobject, &Regextype);
193 if (re != NULL) {
194 char *error;
195 INCREF(string);
196 re->re_string = string;
197 re->re_patbuf.buffer = NULL;
198 re->re_patbuf.allocated = 0;
199 re->re_patbuf.fastmap = re->re_fastmap;
200 re->re_patbuf.translate = NULL;
201 re->re_regs_valid = 0;
202 error = re_compile_pattern(getstringvalue(string),
203 getstringsize(string),
204 &re->re_patbuf);
205 if (error != NULL) {
206 err_setstr(RegexError, error);
207 DECREF(re);
208 re = NULL;
209 }
210 }
211 return re;
212}
213
214static object *
215regex_compile(self, args)
216 object *self;
217 object *args;
218{
219 object *string;
220 if (!getstrarg(args, &string))
221 return NULL;
222 return (object *)newregexobject(string);
223}
224
225static object *cache_pat;
226static object *cache_prog;
227
228static int
229update_cache(pat)
230 object *pat;
231{
232 if (pat != cache_pat) {
233 XDECREF(cache_pat);
234 cache_pat = NULL;
235 XDECREF(cache_prog);
236 cache_prog = regex_compile((object *)NULL, pat);
237 if (cache_prog == NULL)
238 return -1;
239 cache_pat = pat;
240 INCREF(cache_pat);
241 }
242 return 0;
243}
244
245static object *
246regex_match(self, args)
247 object *self;
248 object *args;
249{
250 object *pat, *string;
251 if (!getstrstrarg(args, &pat, &string))
252 return NULL;
253 if (update_cache(pat) < 0)
254 return NULL;
255 return reg_match((regexobject *)cache_prog, string);
256}
257
258static object *
259regex_search(self, args)
260 object *self;
261 object *args;
262{
263 object *pat, *string;
264 if (!getstrstrarg(args, &pat, &string))
265 return NULL;
266 if (update_cache(pat) < 0)
267 return NULL;
268 return reg_search((regexobject *)cache_prog, string);
269}
270
271static object *
272regex_set_syntax(self, args)
273 object *self, *args;
274{
275 int syntax;
276 if (!getintarg(args, &syntax))
277 return NULL;
278 syntax = re_set_syntax(syntax);
279 return newintobject((long)syntax);
280}
281
282static struct methodlist regex_global_methods[] = {
283 {"compile", regex_compile},
284 {"match", regex_match},
285 {"search", regex_search},
286 {"set_syntax", regex_set_syntax},
287 {NULL, NULL} /* sentinel */
288};
289
290initregex()
291{
292 object *m, *d;
293
294 m = initmodule("regex", regex_global_methods);
295 d = getmoduledict(m);
296
297 /* Initialize regex.error exception */
298 RegexError = newstringobject("regex.error");
299 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
300 fatal("can't define regex.error");
301}