blob: 6afe3b7e06339be09232b99d0427fddc371f634c [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
2XXX support translate table
3XXX support range parameter on search
4XXX support mstop parameter on search
5*/
6
7/***********************************************************
8Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
9Netherlands.
10
11 All Rights Reserved
12
13Permission to use, copy, modify, and distribute this software and its
14documentation for any purpose and without fee is hereby granted,
15provided that the above copyright notice appear in all copies and that
16both that copyright notice and this permission notice appear in
17supporting documentation, and that the names of Stichting Mathematisch
18Centrum or CWI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior permission.
20
21STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
22THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
23FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
24FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
25WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
26ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28
29******************************************************************/
30
31/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000032/* This uses Tatu Ylonen's copyleft-free reimplementation of
33 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35#include "allobjects.h"
36#include "modsupport.h"
37
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000039
40static object *RegexError; /* Exception */
41
42typedef struct {
43 OB_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044 struct re_pattern_buffer re_patbuf; /* The compiled expression */
45 struct re_registers re_regs; /* The registers from the last match */
46 int re_regs_valid; /* Nonzero if the registers are valid */
47 char re_fastmap[256]; /* Storage for fastmap */
48} regexobject;
49
50/* Regex object methods */
51
52static void
53reg_dealloc(re)
54 regexobject *re;
55{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000056 XDEL(re->re_patbuf.buffer);
57 XDEL(re->re_patbuf.translate);
58 DEL(re);
59}
60
61static object *
62makeresult(regs)
63 struct re_registers *regs;
64{
65 object *v = newtupleobject(RE_NREGS);
66 if (v != NULL) {
67 int i;
68 for (i = 0; i < RE_NREGS; i++) {
69 object *w, *u;
70 if ( (w = newtupleobject(2)) == NULL ||
71 (u = newintobject(regs->start[i])) == NULL ||
72 settupleitem(w, 0, u) != 0 ||
73 (u = newintobject(regs->end[i])) == NULL ||
74 settupleitem(w, 1, u) != 0) {
75 XDECREF(w);
76 DECREF(v);
77 return NULL;
78 }
79 settupleitem(v, i, w);
80 }
81 }
82 return v;
83}
84
85static object *
86reg_match(re, args)
87 regexobject *re;
88 object *args;
89{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000090 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000091 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000092 int offset;
93 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000094 if (getargs(args, "s#", &buffer, &size)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000095 offset = 0;
96 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +000097 else {
98 err_clear();
99 if (!getargs(args, "(s#i)", &buffer, &size, &offset))
100 return NULL;
Guido van Rossum3d1e1461992-09-03 20:35:01 +0000101 if (offset < 0 || offset > size) {
102 err_setstr(RegexError, "match offset out of range");
103 return NULL;
104 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000105 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000106 re->re_regs_valid = 0;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000107 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000108 if (result < -1) {
109 /* Failure like stack overflow */
110 err_setstr(RegexError, "match failure");
111 return NULL;
112 }
113 re->re_regs_valid = result >= 0;
114 return newintobject((long)result); /* Length of the match or -1 */
115}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000116
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000117static object *
118reg_search(re, args)
119 regexobject *re;
120 object *args;
121{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000122 char *buffer;
123 int size;
124 int offset;
125 int range;
126 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000127
128 if (getargs(args, "s#", &buffer, &size)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000129 offset = 0;
130 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000131 else {
132 err_clear();
133 if (!getargs(args, "(s#i)", &buffer, &size, &offset))
134 return NULL;
Guido van Rossum3d1e1461992-09-03 20:35:01 +0000135 if (offset < 0 || offset > size) {
136 err_setstr(RegexError, "search offset out of range");
137 return NULL;
138 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000139 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000140 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
141 the implementation don't match: the documentation states that
142 |range| positions are tried, while the code tries |range|+1
143 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000144 range = size - offset;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000145 re->re_regs_valid = 0;
146 result = re_search(&re->re_patbuf, buffer, size, offset, range,
147 &re->re_regs);
148 if (result < -1) {
149 /* Failure like stack overflow */
150 err_setstr(RegexError, "match failure");
151 return NULL;
152 }
153 re->re_regs_valid = result >= 0;
154 return newintobject((long)result); /* Position of the match or -1 */
155}
156
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000157static struct methodlist reg_methods[] = {
158 {"match", reg_match},
159 {"search", reg_search},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000160 {NULL, NULL} /* sentinel */
161};
162
163static object *
164reg_getattr(re, name)
165 regexobject *re;
166 char *name;
167{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000168 if (strcmp(name, "regs") == 0) {
169 if (!re->re_regs_valid) {
170 err_setstr(RegexError,
171 "regs only valid after successful match/search");
172 return NULL;
173 }
174 return makeresult(&re->re_regs);
175 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000176 return findmethod(reg_methods, (object *)re, name);
177}
178
179static typeobject Regextype = {
180 OB_HEAD_INIT(&Typetype)
181 0, /*ob_size*/
182 "regex", /*tp_name*/
183 sizeof(regexobject), /*tp_size*/
184 0, /*tp_itemsize*/
185 /* methods */
186 reg_dealloc, /*tp_dealloc*/
187 0, /*tp_print*/
188 reg_getattr, /*tp_getattr*/
189 0, /*tp_setattr*/
190 0, /*tp_compare*/
191 0, /*tp_repr*/
192};
193
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000194static object *
195newregexobject(pat, size)
196 char *pat;
197 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000198{
199 regexobject *re;
200 re = NEWOBJ(regexobject, &Regextype);
201 if (re != NULL) {
202 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000203 re->re_patbuf.buffer = NULL;
204 re->re_patbuf.allocated = 0;
205 re->re_patbuf.fastmap = re->re_fastmap;
206 re->re_patbuf.translate = NULL;
207 re->re_regs_valid = 0;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000208 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000209 if (error != NULL) {
210 err_setstr(RegexError, error);
211 DECREF(re);
212 re = NULL;
213 }
214 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000215 return (object *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000216}
217
218static object *
219regex_compile(self, args)
220 object *self;
221 object *args;
222{
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000223 char *pat;
224 int size;
225 if (!getargs(args, "s#", &pat, &size))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000226 return NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000227 return newregexobject(pat, size);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000228}
229
230static object *cache_pat;
231static object *cache_prog;
232
233static int
234update_cache(pat)
235 object *pat;
236{
237 if (pat != cache_pat) {
238 XDECREF(cache_pat);
239 cache_pat = NULL;
240 XDECREF(cache_prog);
241 cache_prog = regex_compile((object *)NULL, pat);
242 if (cache_prog == NULL)
243 return -1;
244 cache_pat = pat;
245 INCREF(cache_pat);
246 }
247 return 0;
248}
249
250static object *
251regex_match(self, args)
252 object *self;
253 object *args;
254{
255 object *pat, *string;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000256 if (!getStrStrarg(args, &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000257 return NULL;
258 if (update_cache(pat) < 0)
259 return NULL;
260 return reg_match((regexobject *)cache_prog, string);
261}
262
263static object *
264regex_search(self, args)
265 object *self;
266 object *args;
267{
268 object *pat, *string;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000269 if (!getStrStrarg(args, &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000270 return NULL;
271 if (update_cache(pat) < 0)
272 return NULL;
273 return reg_search((regexobject *)cache_prog, string);
274}
275
276static object *
277regex_set_syntax(self, args)
278 object *self, *args;
279{
280 int syntax;
281 if (!getintarg(args, &syntax))
282 return NULL;
283 syntax = re_set_syntax(syntax);
284 return newintobject((long)syntax);
285}
286
287static struct methodlist regex_global_methods[] = {
288 {"compile", regex_compile},
289 {"match", regex_match},
290 {"search", regex_search},
291 {"set_syntax", regex_set_syntax},
292 {NULL, NULL} /* sentinel */
293};
294
295initregex()
296{
297 object *m, *d;
298
299 m = initmodule("regex", regex_global_methods);
300 d = getmoduledict(m);
301
302 /* Initialize regex.error exception */
303 RegexError = newstringobject("regex.error");
304 if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
305 fatal("can't define regex.error");
306}