blob: c6d559e32ad43e2e7edbf481b1a4479ac680bfde [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
2XXX support translate table
3XXX support range parameter on search
4XXX support mstop parameter on search
5*/
6
7/***********************************************************
8Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
9Netherlands.
10
11 All Rights Reserved
12
13Permission to use, copy, modify, and distribute this software and its
14documentation for any purpose and without fee is hereby granted,
15provided that the above copyright notice appear in all copies and that
16both that copyright notice and this permission notice appear in
17supporting documentation, and that the names of Stichting Mathematisch
18Centrum or CWI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior permission.
20
21STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
22THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
23FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
24FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
25WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
26ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28
29******************************************************************/
30
31/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000032/* This uses Tatu Ylonen's copyleft-free reimplementation of
33 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35#include "allobjects.h"
36#include "modsupport.h"
37
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000039
40static object *RegexError; /* Exception */
41
42typedef struct {
43 OB_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044 struct re_pattern_buffer re_patbuf; /* The compiled expression */
45 struct re_registers re_regs; /* The registers from the last match */
46 int re_regs_valid; /* Nonzero if the registers are valid */
47 char re_fastmap[256]; /* Storage for fastmap */
48} regexobject;
49
50/* Regex object methods */
51
52static void
53reg_dealloc(re)
54 regexobject *re;
55{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000056 XDEL(re->re_patbuf.buffer);
57 XDEL(re->re_patbuf.translate);
58 DEL(re);
59}
60
61static object *
62makeresult(regs)
63 struct re_registers *regs;
64{
65 object *v = newtupleobject(RE_NREGS);
66 if (v != NULL) {
67 int i;
68 for (i = 0; i < RE_NREGS; i++) {
69 object *w, *u;
70 if ( (w = newtupleobject(2)) == NULL ||
71 (u = newintobject(regs->start[i])) == NULL ||
72 settupleitem(w, 0, u) != 0 ||
73 (u = newintobject(regs->end[i])) == NULL ||
74 settupleitem(w, 1, u) != 0) {
75 XDECREF(w);
76 DECREF(v);
77 return NULL;
78 }
79 settupleitem(v, i, w);
80 }
81 }
82 return v;
83}
84
85static object *
86reg_match(re, args)
87 regexobject *re;
88 object *args;
89{
90 object *v;
91 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000092 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000093 int offset;
94 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000095 if (getargs(args, "s#", &buffer, &size)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000096 offset = 0;
97 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +000098 else {
99 err_clear();
100 if (!getargs(args, "(s#i)", &buffer, &size, &offset))
101 return NULL;
102 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000103 re->re_regs_valid = 0;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000104 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000105 if (result < -1) {
106 /* Failure like stack overflow */
107 err_setstr(RegexError, "match failure");
108 return NULL;
109 }
110 re->re_regs_valid = result >= 0;
111 return newintobject((long)result); /* Length of the match or -1 */
112}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000113
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000114static object *
115reg_search(re, args)
116 regexobject *re;
117 object *args;
118{
119 object *v;
120 char *buffer;
121 int size;
122 int offset;
123 int range;
124 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000125
126 if (getargs(args, "s#", &buffer, &size)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000127 offset = 0;
128 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000129 else {
130 err_clear();
131 if (!getargs(args, "(s#i)", &buffer, &size, &offset))
132 return NULL;
133 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000134 if (offset < 0 || offset > size) {
135 err_setstr(RegexError, "search offset out of range");
136 return NULL;
137 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000138 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
139 the implementation don't match: the documentation states that
140 |range| positions are tried, while the code tries |range|+1
141 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000142 range = size - offset;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000143 re->re_regs_valid = 0;
144 result = re_search(&re->re_patbuf, buffer, size, offset, range,
145 &re->re_regs);
146 if (result < -1) {
147 /* Failure like stack overflow */
148 err_setstr(RegexError, "match failure");
149 return NULL;
150 }
151 re->re_regs_valid = result >= 0;
152 return newintobject((long)result); /* Position of the match or -1 */
153}
154
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000155static struct methodlist reg_methods[] = {
156 {"match", reg_match},
157 {"search", reg_search},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000158 {NULL, NULL} /* sentinel */
159};
160
161static object *
162reg_getattr(re, name)
163 regexobject *re;
164 char *name;
165{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000166 if (strcmp(name, "regs") == 0) {
167 if (!re->re_regs_valid) {
168 err_setstr(RegexError,
169 "regs only valid after successful match/search");
170 return NULL;
171 }
172 return makeresult(&re->re_regs);
173 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000174 return findmethod(reg_methods, (object *)re, name);
175}
176
177static typeobject Regextype = {
178 OB_HEAD_INIT(&Typetype)
179 0, /*ob_size*/
180 "regex", /*tp_name*/
181 sizeof(regexobject), /*tp_size*/
182 0, /*tp_itemsize*/
183 /* methods */
184 reg_dealloc, /*tp_dealloc*/
185 0, /*tp_print*/
186 reg_getattr, /*tp_getattr*/
187 0, /*tp_setattr*/
188 0, /*tp_compare*/
189 0, /*tp_repr*/
190};
191
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000192static object *
193newregexobject(pat, size)
194 char *pat;
195 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000196{
197 regexobject *re;
198 re = NEWOBJ(regexobject, &Regextype);
199 if (re != NULL) {
200 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000201 re->re_patbuf.buffer = NULL;
202 re->re_patbuf.allocated = 0;
203 re->re_patbuf.fastmap = re->re_fastmap;
204 re->re_patbuf.translate = NULL;
205 re->re_regs_valid = 0;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000206 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000207 if (error != NULL) {
208 err_setstr(RegexError, error);
209 DECREF(re);
210 re = NULL;
211 }
212 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000213 return (object *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000214}
215
216static object *
217regex_compile(self, args)
218 object *self;
219 object *args;
220{
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000221 char *pat;
222 int size;
223 if (!getargs(args, "s#", &pat, &size))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000224 return NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000225 return newregexobject(pat, size);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000226}
227
228static object *cache_pat;
229static object *cache_prog;
230
231static int
232update_cache(pat)
233 object *pat;
234{
235 if (pat != cache_pat) {
236 XDECREF(cache_pat);
237 cache_pat = NULL;
238 XDECREF(cache_prog);
239 cache_prog = regex_compile((object *)NULL, pat);
240 if (cache_prog == NULL)
241 return -1;
242 cache_pat = pat;
243 INCREF(cache_pat);
244 }
245 return 0;
246}
247
248static object *
249regex_match(self, args)
250 object *self;
251 object *args;
252{
253 object *pat, *string;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000254 if (!getStrStrarg(args, &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000255 return NULL;
256 if (update_cache(pat) < 0)
257 return NULL;
258 return reg_match((regexobject *)cache_prog, string);
259}
260
261static object *
262regex_search(self, args)
263 object *self;
264 object *args;
265{
266 object *pat, *string;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000267 if (!getStrStrarg(args, &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000268 return NULL;
269 if (update_cache(pat) < 0)
270 return NULL;
271 return reg_search((regexobject *)cache_prog, string);
272}
273
274static object *
275regex_set_syntax(self, args)
276 object *self, *args;
277{
278 int syntax;
279 if (!getintarg(args, &syntax))
280 return NULL;
281 syntax = re_set_syntax(syntax);
282 return newintobject((long)syntax);
283}
284
285static struct methodlist regex_global_methods[] = {
286 {"compile", regex_compile},
287 {"match", regex_match},
288 {"search", regex_search},
289 {"set_syntax", regex_set_syntax},
290 {NULL, NULL} /* sentinel */
291};
292
293initregex()
294{
295 object *m, *d;
296
297 m = initmodule("regex", regex_global_methods);
298 d = getmoduledict(m);
299
300 /* Initialize regex.error exception */
301 RegexError = newstringobject("regex.error");
302 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
303 fatal("can't define regex.error");
304}