blob: 5e828325b92ad043e948c239c73d9184ce1b926e [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
2XXX support translate table
3XXX support range parameter on search
4XXX support mstop parameter on search
5*/
6
7/***********************************************************
8Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
9Netherlands.
10
11 All Rights Reserved
12
13Permission to use, copy, modify, and distribute this software and its
14documentation for any purpose and without fee is hereby granted,
15provided that the above copyright notice appear in all copies and that
16both that copyright notice and this permission notice appear in
17supporting documentation, and that the names of Stichting Mathematisch
18Centrum or CWI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior permission.
20
21STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
22THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
23FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
24FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
25WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
26ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28
29******************************************************************/
30
31/* Regular expression objects */
32/* This uses GNU regex.c, from subdirectory regex !!! */
33
34#include "allobjects.h"
35#include "modsupport.h"
36
37#include "regex.h"
38
39static object *RegexError; /* Exception */
40
41typedef struct {
42 OB_HEAD
43 object *re_string; /* The string (for printing) */
44 struct re_pattern_buffer re_patbuf; /* The compiled expression */
45 struct re_registers re_regs; /* The registers from the last match */
46 int re_regs_valid; /* Nonzero if the registers are valid */
47 char re_fastmap[256]; /* Storage for fastmap */
48} regexobject;
49
50/* Regex object methods */
51
52static void
53reg_dealloc(re)
54 regexobject *re;
55{
56 XDECREF(re->re_string);
57 XDEL(re->re_patbuf.buffer);
58 XDEL(re->re_patbuf.translate);
59 DEL(re);
60}
61
62static object *
63makeresult(regs)
64 struct re_registers *regs;
65{
66 object *v = newtupleobject(RE_NREGS);
67 if (v != NULL) {
68 int i;
69 for (i = 0; i < RE_NREGS; i++) {
70 object *w, *u;
71 if ( (w = newtupleobject(2)) == NULL ||
72 (u = newintobject(regs->start[i])) == NULL ||
73 settupleitem(w, 0, u) != 0 ||
74 (u = newintobject(regs->end[i])) == NULL ||
75 settupleitem(w, 1, u) != 0) {
76 XDECREF(w);
77 DECREF(v);
78 return NULL;
79 }
80 settupleitem(v, i, w);
81 }
82 }
83 return v;
84}
85
86static object *
87reg_match(re, args)
88 regexobject *re;
89 object *args;
90{
91 object *v;
92 char *buffer;
93 int offset;
94 int result;
95 if (args != NULL && is_stringobject(args)) {
96 v = args;
97 offset = 0;
98 }
99 else if (!getstrintarg(args, &v, &offset))
100 return NULL;
101 buffer = getstringvalue(v);
102 re->re_regs_valid = 0;
103 result = re_match(&re->re_patbuf, buffer, getstringsize(v),
104 offset, &re->re_regs);
105 if (result < -1) {
106 /* Failure like stack overflow */
107 err_setstr(RegexError, "match failure");
108 return NULL;
109 }
110 re->re_regs_valid = result >= 0;
111 return newintobject((long)result); /* Length of the match or -1 */
112}
113static object *
114reg_search(re, args)
115 regexobject *re;
116 object *args;
117{
118 object *v;
119 char *buffer;
120 int size;
121 int offset;
122 int range;
123 int result;
124 if (args != NULL && is_stringobject(args)) {
125 v = args;
126 offset = 0;
127 }
128 else if (!getstrintarg(args, &v, &offset))
129 return NULL;
130 buffer = getstringvalue(v);
131 size = getstringsize(v);
132 if (offset < 0 || offset > size) {
133 err_setstr(RegexError, "search offset out of range");
134 return NULL;
135 }
136 range = size - offset + 1;
137 re->re_regs_valid = 0;
138 result = re_search(&re->re_patbuf, buffer, size, offset, range,
139 &re->re_regs);
140 if (result < -1) {
141 /* Failure like stack overflow */
142 err_setstr(RegexError, "match failure");
143 return NULL;
144 }
145 re->re_regs_valid = result >= 0;
146 return newintobject((long)result); /* Position of the match or -1 */
147}
148
149static object *
150reg_regs(re, args)
151 regexobject *re;
152 object *args;
153{
154 if (!re->re_regs_valid) {
155 err_setstr(RegexError,
156 "regs only valid after successful match/search");
157 return NULL;
158 }
159 return makeresult(&re->re_regs);
160}
161
162static struct methodlist reg_methods[] = {
163 {"match", reg_match},
164 {"search", reg_search},
165 {"regs", reg_regs},
166 {NULL, NULL} /* sentinel */
167};
168
169static object *
170reg_getattr(re, name)
171 regexobject *re;
172 char *name;
173{
174 return findmethod(reg_methods, (object *)re, name);
175}
176
177static typeobject Regextype = {
178 OB_HEAD_INIT(&Typetype)
179 0, /*ob_size*/
180 "regex", /*tp_name*/
181 sizeof(regexobject), /*tp_size*/
182 0, /*tp_itemsize*/
183 /* methods */
184 reg_dealloc, /*tp_dealloc*/
185 0, /*tp_print*/
186 reg_getattr, /*tp_getattr*/
187 0, /*tp_setattr*/
188 0, /*tp_compare*/
189 0, /*tp_repr*/
190};
191
192static regexobject *
193newregexobject(string)
194 object *string;
195{
196 regexobject *re;
197 re = NEWOBJ(regexobject, &Regextype);
198 if (re != NULL) {
199 char *error;
200 INCREF(string);
201 re->re_string = string;
202 re->re_patbuf.buffer = NULL;
203 re->re_patbuf.allocated = 0;
204 re->re_patbuf.fastmap = re->re_fastmap;
205 re->re_patbuf.translate = NULL;
206 re->re_regs_valid = 0;
207 error = re_compile_pattern(getstringvalue(string),
208 getstringsize(string),
209 &re->re_patbuf);
210 if (error != NULL) {
211 err_setstr(RegexError, error);
212 DECREF(re);
213 re = NULL;
214 }
215 }
216 return re;
217}
218
219static object *
220regex_compile(self, args)
221 object *self;
222 object *args;
223{
224 object *string;
225 if (!getstrarg(args, &string))
226 return NULL;
227 return (object *)newregexobject(string);
228}
229
230static object *cache_pat;
231static object *cache_prog;
232
233static int
234update_cache(pat)
235 object *pat;
236{
237 if (pat != cache_pat) {
238 XDECREF(cache_pat);
239 cache_pat = NULL;
240 XDECREF(cache_prog);
241 cache_prog = regex_compile((object *)NULL, pat);
242 if (cache_prog == NULL)
243 return -1;
244 cache_pat = pat;
245 INCREF(cache_pat);
246 }
247 return 0;
248}
249
250static object *
251regex_match(self, args)
252 object *self;
253 object *args;
254{
255 object *pat, *string;
256 if (!getstrstrarg(args, &pat, &string))
257 return NULL;
258 if (update_cache(pat) < 0)
259 return NULL;
260 return reg_match((regexobject *)cache_prog, string);
261}
262
263static object *
264regex_search(self, args)
265 object *self;
266 object *args;
267{
268 object *pat, *string;
269 if (!getstrstrarg(args, &pat, &string))
270 return NULL;
271 if (update_cache(pat) < 0)
272 return NULL;
273 return reg_search((regexobject *)cache_prog, string);
274}
275
276static object *
277regex_set_syntax(self, args)
278 object *self, *args;
279{
280 int syntax;
281 if (!getintarg(args, &syntax))
282 return NULL;
283 syntax = re_set_syntax(syntax);
284 return newintobject((long)syntax);
285}
286
287static struct methodlist regex_global_methods[] = {
288 {"compile", regex_compile},
289 {"match", regex_match},
290 {"search", regex_search},
291 {"set_syntax", regex_set_syntax},
292 {NULL, NULL} /* sentinel */
293};
294
295initregex()
296{
297 object *m, *d;
298
299 m = initmodule("regex", regex_global_methods);
300 d = getmoduledict(m);
301
302 /* Initialize regex.error exception */
303 RegexError = newstringobject("regex.error");
304 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
305 fatal("can't define regex.error");
306}