blob: 77daa77b6cb16494ae1b3de1e10ea58f523a9ff8 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
2XXX support translate table
3XXX support range parameter on search
4XXX support mstop parameter on search
5*/
6
7/***********************************************************
8Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
9Netherlands.
10
11 All Rights Reserved
12
13Permission to use, copy, modify, and distribute this software and its
14documentation for any purpose and without fee is hereby granted,
15provided that the above copyright notice appear in all copies and that
16both that copyright notice and this permission notice appear in
17supporting documentation, and that the names of Stichting Mathematisch
18Centrum or CWI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior permission.
20
21STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
22THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
23FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
24FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
25WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
26ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28
29******************************************************************/
30
31/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000032/* This uses Tatu Ylonen's copyleft-free reimplementation of
33 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35#include "allobjects.h"
36#include "modsupport.h"
37
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000039
40static object *RegexError; /* Exception */
41
42typedef struct {
43 OB_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044 struct re_pattern_buffer re_patbuf; /* The compiled expression */
45 struct re_registers re_regs; /* The registers from the last match */
46 int re_regs_valid; /* Nonzero if the registers are valid */
47 char re_fastmap[256]; /* Storage for fastmap */
48} regexobject;
49
50/* Regex object methods */
51
52static void
53reg_dealloc(re)
54 regexobject *re;
55{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000056 XDEL(re->re_patbuf.buffer);
57 XDEL(re->re_patbuf.translate);
58 DEL(re);
59}
60
61static object *
62makeresult(regs)
63 struct re_registers *regs;
64{
65 object *v = newtupleobject(RE_NREGS);
66 if (v != NULL) {
67 int i;
68 for (i = 0; i < RE_NREGS; i++) {
69 object *w, *u;
70 if ( (w = newtupleobject(2)) == NULL ||
71 (u = newintobject(regs->start[i])) == NULL ||
72 settupleitem(w, 0, u) != 0 ||
73 (u = newintobject(regs->end[i])) == NULL ||
74 settupleitem(w, 1, u) != 0) {
75 XDECREF(w);
76 DECREF(v);
77 return NULL;
78 }
79 settupleitem(v, i, w);
80 }
81 }
82 return v;
83}
84
85static object *
86reg_match(re, args)
87 regexobject *re;
88 object *args;
89{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000090 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000091 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000092 int offset;
93 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000094 if (getargs(args, "s#", &buffer, &size)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000095 offset = 0;
96 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +000097 else {
98 err_clear();
99 if (!getargs(args, "(s#i)", &buffer, &size, &offset))
100 return NULL;
101 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000102 re->re_regs_valid = 0;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000103 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000104 if (result < -1) {
105 /* Failure like stack overflow */
106 err_setstr(RegexError, "match failure");
107 return NULL;
108 }
109 re->re_regs_valid = result >= 0;
110 return newintobject((long)result); /* Length of the match or -1 */
111}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000112
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000113static object *
114reg_search(re, args)
115 regexobject *re;
116 object *args;
117{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000118 char *buffer;
119 int size;
120 int offset;
121 int range;
122 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000123
124 if (getargs(args, "s#", &buffer, &size)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000125 offset = 0;
126 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000127 else {
128 err_clear();
129 if (!getargs(args, "(s#i)", &buffer, &size, &offset))
130 return NULL;
131 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000132 if (offset < 0 || offset > size) {
133 err_setstr(RegexError, "search offset out of range");
134 return NULL;
135 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000136 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
137 the implementation don't match: the documentation states that
138 |range| positions are tried, while the code tries |range|+1
139 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000140 range = size - offset;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000141 re->re_regs_valid = 0;
142 result = re_search(&re->re_patbuf, buffer, size, offset, range,
143 &re->re_regs);
144 if (result < -1) {
145 /* Failure like stack overflow */
146 err_setstr(RegexError, "match failure");
147 return NULL;
148 }
149 re->re_regs_valid = result >= 0;
150 return newintobject((long)result); /* Position of the match or -1 */
151}
152
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000153static struct methodlist reg_methods[] = {
154 {"match", reg_match},
155 {"search", reg_search},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000156 {NULL, NULL} /* sentinel */
157};
158
159static object *
160reg_getattr(re, name)
161 regexobject *re;
162 char *name;
163{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000164 if (strcmp(name, "regs") == 0) {
165 if (!re->re_regs_valid) {
166 err_setstr(RegexError,
167 "regs only valid after successful match/search");
168 return NULL;
169 }
170 return makeresult(&re->re_regs);
171 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000172 return findmethod(reg_methods, (object *)re, name);
173}
174
175static typeobject Regextype = {
176 OB_HEAD_INIT(&Typetype)
177 0, /*ob_size*/
178 "regex", /*tp_name*/
179 sizeof(regexobject), /*tp_size*/
180 0, /*tp_itemsize*/
181 /* methods */
182 reg_dealloc, /*tp_dealloc*/
183 0, /*tp_print*/
184 reg_getattr, /*tp_getattr*/
185 0, /*tp_setattr*/
186 0, /*tp_compare*/
187 0, /*tp_repr*/
188};
189
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000190static object *
191newregexobject(pat, size)
192 char *pat;
193 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000194{
195 regexobject *re;
196 re = NEWOBJ(regexobject, &Regextype);
197 if (re != NULL) {
198 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000199 re->re_patbuf.buffer = NULL;
200 re->re_patbuf.allocated = 0;
201 re->re_patbuf.fastmap = re->re_fastmap;
202 re->re_patbuf.translate = NULL;
203 re->re_regs_valid = 0;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000204 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000205 if (error != NULL) {
206 err_setstr(RegexError, error);
207 DECREF(re);
208 re = NULL;
209 }
210 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000211 return (object *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000212}
213
214static object *
215regex_compile(self, args)
216 object *self;
217 object *args;
218{
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000219 char *pat;
220 int size;
221 if (!getargs(args, "s#", &pat, &size))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000222 return NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000223 return newregexobject(pat, size);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000224}
225
226static object *cache_pat;
227static object *cache_prog;
228
229static int
230update_cache(pat)
231 object *pat;
232{
233 if (pat != cache_pat) {
234 XDECREF(cache_pat);
235 cache_pat = NULL;
236 XDECREF(cache_prog);
237 cache_prog = regex_compile((object *)NULL, pat);
238 if (cache_prog == NULL)
239 return -1;
240 cache_pat = pat;
241 INCREF(cache_pat);
242 }
243 return 0;
244}
245
246static object *
247regex_match(self, args)
248 object *self;
249 object *args;
250{
251 object *pat, *string;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000252 if (!getStrStrarg(args, &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000253 return NULL;
254 if (update_cache(pat) < 0)
255 return NULL;
256 return reg_match((regexobject *)cache_prog, string);
257}
258
259static object *
260regex_search(self, args)
261 object *self;
262 object *args;
263{
264 object *pat, *string;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000265 if (!getStrStrarg(args, &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000266 return NULL;
267 if (update_cache(pat) < 0)
268 return NULL;
269 return reg_search((regexobject *)cache_prog, string);
270}
271
272static object *
273regex_set_syntax(self, args)
274 object *self, *args;
275{
276 int syntax;
277 if (!getintarg(args, &syntax))
278 return NULL;
279 syntax = re_set_syntax(syntax);
280 return newintobject((long)syntax);
281}
282
283static struct methodlist regex_global_methods[] = {
284 {"compile", regex_compile},
285 {"match", regex_match},
286 {"search", regex_search},
287 {"set_syntax", regex_set_syntax},
288 {NULL, NULL} /* sentinel */
289};
290
291initregex()
292{
293 object *m, *d;
294
295 m = initmodule("regex", regex_global_methods);
296 d = getmoduledict(m);
297
298 /* Initialize regex.error exception */
299 RegexError = newstringobject("regex.error");
300 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
301 fatal("can't define regex.error");
302}