blob: fade99fa030f2ccbfba44dccbe4444e431532649 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
2XXX support translate table
3XXX support range parameter on search
4XXX support mstop parameter on search
5*/
6
7/***********************************************************
8Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
9Netherlands.
10
11 All Rights Reserved
12
13Permission to use, copy, modify, and distribute this software and its
14documentation for any purpose and without fee is hereby granted,
15provided that the above copyright notice appear in all copies and that
16both that copyright notice and this permission notice appear in
17supporting documentation, and that the names of Stichting Mathematisch
18Centrum or CWI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior permission.
20
21STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
22THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
23FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
24FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
25WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
26ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28
29******************************************************************/
30
31/* Regular expression objects */
32/* This uses GNU regex.c, from subdirectory regex !!! */
33
34#include "allobjects.h"
35#include "modsupport.h"
36
37#include "regex.h"
38
39static object *RegexError; /* Exception */
40
41typedef struct {
42 OB_HEAD
43 object *re_string; /* The string (for printing) */
44 struct re_pattern_buffer re_patbuf; /* The compiled expression */
45 struct re_registers re_regs; /* The registers from the last match */
46 int re_regs_valid; /* Nonzero if the registers are valid */
47 char re_fastmap[256]; /* Storage for fastmap */
48} regexobject;
49
50/* Regex object methods */
51
52static void
53reg_dealloc(re)
54 regexobject *re;
55{
56 XDECREF(re->re_string);
57 XDEL(re->re_patbuf.buffer);
58 XDEL(re->re_patbuf.translate);
59 DEL(re);
60}
61
62static object *
63makeresult(regs)
64 struct re_registers *regs;
65{
66 object *v = newtupleobject(RE_NREGS);
67 if (v != NULL) {
68 int i;
69 for (i = 0; i < RE_NREGS; i++) {
70 object *w, *u;
71 if ( (w = newtupleobject(2)) == NULL ||
72 (u = newintobject(regs->start[i])) == NULL ||
73 settupleitem(w, 0, u) != 0 ||
74 (u = newintobject(regs->end[i])) == NULL ||
75 settupleitem(w, 1, u) != 0) {
76 XDECREF(w);
77 DECREF(v);
78 return NULL;
79 }
80 settupleitem(v, i, w);
81 }
82 }
83 return v;
84}
85
86static object *
87reg_match(re, args)
88 regexobject *re;
89 object *args;
90{
91 object *v;
92 char *buffer;
93 int offset;
94 int result;
95 if (args != NULL && is_stringobject(args)) {
96 v = args;
97 offset = 0;
98 }
99 else if (!getstrintarg(args, &v, &offset))
100 return NULL;
101 buffer = getstringvalue(v);
102 re->re_regs_valid = 0;
103 result = re_match(&re->re_patbuf, buffer, getstringsize(v),
104 offset, &re->re_regs);
105 if (result < -1) {
106 /* Failure like stack overflow */
107 err_setstr(RegexError, "match failure");
108 return NULL;
109 }
110 re->re_regs_valid = result >= 0;
111 return newintobject((long)result); /* Length of the match or -1 */
112}
113static object *
114reg_search(re, args)
115 regexobject *re;
116 object *args;
117{
118 object *v;
119 char *buffer;
120 int size;
121 int offset;
122 int range;
123 int result;
124 if (args != NULL && is_stringobject(args)) {
125 v = args;
126 offset = 0;
127 }
128 else if (!getstrintarg(args, &v, &offset))
129 return NULL;
130 buffer = getstringvalue(v);
131 size = getstringsize(v);
132 if (offset < 0 || offset > size) {
133 err_setstr(RegexError, "search offset out of range");
134 return NULL;
135 }
136 range = size - offset + 1;
137 re->re_regs_valid = 0;
138 result = re_search(&re->re_patbuf, buffer, size, offset, range,
139 &re->re_regs);
140 if (result < -1) {
141 /* Failure like stack overflow */
142 err_setstr(RegexError, "match failure");
143 return NULL;
144 }
145 re->re_regs_valid = result >= 0;
146 return newintobject((long)result); /* Position of the match or -1 */
147}
148
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000149static struct methodlist reg_methods[] = {
150 {"match", reg_match},
151 {"search", reg_search},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000152 {NULL, NULL} /* sentinel */
153};
154
155static object *
156reg_getattr(re, name)
157 regexobject *re;
158 char *name;
159{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000160 if (strcmp(name, "regs") == 0) {
161 if (!re->re_regs_valid) {
162 err_setstr(RegexError,
163 "regs only valid after successful match/search");
164 return NULL;
165 }
166 return makeresult(&re->re_regs);
167 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000168 return findmethod(reg_methods, (object *)re, name);
169}
170
171static typeobject Regextype = {
172 OB_HEAD_INIT(&Typetype)
173 0, /*ob_size*/
174 "regex", /*tp_name*/
175 sizeof(regexobject), /*tp_size*/
176 0, /*tp_itemsize*/
177 /* methods */
178 reg_dealloc, /*tp_dealloc*/
179 0, /*tp_print*/
180 reg_getattr, /*tp_getattr*/
181 0, /*tp_setattr*/
182 0, /*tp_compare*/
183 0, /*tp_repr*/
184};
185
186static regexobject *
187newregexobject(string)
188 object *string;
189{
190 regexobject *re;
191 re = NEWOBJ(regexobject, &Regextype);
192 if (re != NULL) {
193 char *error;
194 INCREF(string);
195 re->re_string = string;
196 re->re_patbuf.buffer = NULL;
197 re->re_patbuf.allocated = 0;
198 re->re_patbuf.fastmap = re->re_fastmap;
199 re->re_patbuf.translate = NULL;
200 re->re_regs_valid = 0;
201 error = re_compile_pattern(getstringvalue(string),
202 getstringsize(string),
203 &re->re_patbuf);
204 if (error != NULL) {
205 err_setstr(RegexError, error);
206 DECREF(re);
207 re = NULL;
208 }
209 }
210 return re;
211}
212
213static object *
214regex_compile(self, args)
215 object *self;
216 object *args;
217{
218 object *string;
219 if (!getstrarg(args, &string))
220 return NULL;
221 return (object *)newregexobject(string);
222}
223
224static object *cache_pat;
225static object *cache_prog;
226
227static int
228update_cache(pat)
229 object *pat;
230{
231 if (pat != cache_pat) {
232 XDECREF(cache_pat);
233 cache_pat = NULL;
234 XDECREF(cache_prog);
235 cache_prog = regex_compile((object *)NULL, pat);
236 if (cache_prog == NULL)
237 return -1;
238 cache_pat = pat;
239 INCREF(cache_pat);
240 }
241 return 0;
242}
243
244static object *
245regex_match(self, args)
246 object *self;
247 object *args;
248{
249 object *pat, *string;
250 if (!getstrstrarg(args, &pat, &string))
251 return NULL;
252 if (update_cache(pat) < 0)
253 return NULL;
254 return reg_match((regexobject *)cache_prog, string);
255}
256
257static object *
258regex_search(self, args)
259 object *self;
260 object *args;
261{
262 object *pat, *string;
263 if (!getstrstrarg(args, &pat, &string))
264 return NULL;
265 if (update_cache(pat) < 0)
266 return NULL;
267 return reg_search((regexobject *)cache_prog, string);
268}
269
270static object *
271regex_set_syntax(self, args)
272 object *self, *args;
273{
274 int syntax;
275 if (!getintarg(args, &syntax))
276 return NULL;
277 syntax = re_set_syntax(syntax);
278 return newintobject((long)syntax);
279}
280
281static struct methodlist regex_global_methods[] = {
282 {"compile", regex_compile},
283 {"match", regex_match},
284 {"search", regex_search},
285 {"set_syntax", regex_set_syntax},
286 {NULL, NULL} /* sentinel */
287};
288
289initregex()
290{
291 object *m, *d;
292
293 m = initmodule("regex", regex_global_methods);
294 d = getmoduledict(m);
295
296 /* Initialize regex.error exception */
297 RegexError = newstringobject("regex.error");
298 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
299 fatal("can't define regex.error");
300}