Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 1 | /* |
| 2 | XXX support translate table |
| 3 | XXX support range parameter on search |
| 4 | XXX support mstop parameter on search |
| 5 | */ |
| 6 | |
| 7 | /*********************************************************** |
| 8 | Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The |
| 9 | Netherlands. |
| 10 | |
| 11 | All Rights Reserved |
| 12 | |
| 13 | Permission to use, copy, modify, and distribute this software and its |
| 14 | documentation for any purpose and without fee is hereby granted, |
| 15 | provided that the above copyright notice appear in all copies and that |
| 16 | both that copyright notice and this permission notice appear in |
| 17 | supporting documentation, and that the names of Stichting Mathematisch |
| 18 | Centrum or CWI not be used in advertising or publicity pertaining to |
| 19 | distribution of the software without specific, written prior permission. |
| 20 | |
| 21 | STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO |
| 22 | THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 23 | FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE |
| 24 | FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 25 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 26 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
| 27 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 28 | |
| 29 | ******************************************************************/ |
| 30 | |
| 31 | /* Regular expression objects */ |
Guido van Rossum | 1cab95c | 1992-01-19 16:31:57 +0000 | [diff] [blame] | 32 | /* This uses Tatu Ylonen's copyleft-free reimplementation of |
| 33 | GNU regular expressions */ |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 34 | |
| 35 | #include "allobjects.h" |
| 36 | #include "modsupport.h" |
| 37 | |
Guido van Rossum | 1cab95c | 1992-01-19 16:31:57 +0000 | [diff] [blame] | 38 | #include "regexpr.h" |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 39 | |
| 40 | static object *RegexError; /* Exception */ |
| 41 | |
| 42 | typedef struct { |
| 43 | OB_HEAD |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 44 | struct re_pattern_buffer re_patbuf; /* The compiled expression */ |
| 45 | struct re_registers re_regs; /* The registers from the last match */ |
| 46 | int re_regs_valid; /* Nonzero if the registers are valid */ |
| 47 | char re_fastmap[256]; /* Storage for fastmap */ |
| 48 | } regexobject; |
| 49 | |
| 50 | /* Regex object methods */ |
| 51 | |
| 52 | static void |
| 53 | reg_dealloc(re) |
| 54 | regexobject *re; |
| 55 | { |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 56 | XDEL(re->re_patbuf.buffer); |
| 57 | XDEL(re->re_patbuf.translate); |
| 58 | DEL(re); |
| 59 | } |
| 60 | |
| 61 | static object * |
| 62 | makeresult(regs) |
| 63 | struct re_registers *regs; |
| 64 | { |
| 65 | object *v = newtupleobject(RE_NREGS); |
| 66 | if (v != NULL) { |
| 67 | int i; |
| 68 | for (i = 0; i < RE_NREGS; i++) { |
| 69 | object *w, *u; |
| 70 | if ( (w = newtupleobject(2)) == NULL || |
| 71 | (u = newintobject(regs->start[i])) == NULL || |
| 72 | settupleitem(w, 0, u) != 0 || |
| 73 | (u = newintobject(regs->end[i])) == NULL || |
| 74 | settupleitem(w, 1, u) != 0) { |
| 75 | XDECREF(w); |
| 76 | DECREF(v); |
| 77 | return NULL; |
| 78 | } |
| 79 | settupleitem(v, i, w); |
| 80 | } |
| 81 | } |
| 82 | return v; |
| 83 | } |
| 84 | |
| 85 | static object * |
| 86 | reg_match(re, args) |
| 87 | regexobject *re; |
| 88 | object *args; |
| 89 | { |
| 90 | object *v; |
| 91 | char *buffer; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 92 | int size; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 93 | int offset; |
| 94 | int result; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 95 | if (getargs(args, "s#", &buffer, &size)) { |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 96 | offset = 0; |
| 97 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 98 | else { |
| 99 | err_clear(); |
| 100 | if (!getargs(args, "(s#i)", &buffer, &size, &offset)) |
| 101 | return NULL; |
| 102 | } |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 103 | re->re_regs_valid = 0; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 104 | result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs); |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 105 | if (result < -1) { |
| 106 | /* Failure like stack overflow */ |
| 107 | err_setstr(RegexError, "match failure"); |
| 108 | return NULL; |
| 109 | } |
| 110 | re->re_regs_valid = result >= 0; |
| 111 | return newintobject((long)result); /* Length of the match or -1 */ |
| 112 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 113 | |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 114 | static object * |
| 115 | reg_search(re, args) |
| 116 | regexobject *re; |
| 117 | object *args; |
| 118 | { |
| 119 | object *v; |
| 120 | char *buffer; |
| 121 | int size; |
| 122 | int offset; |
| 123 | int range; |
| 124 | int result; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 125 | |
| 126 | if (getargs(args, "s#", &buffer, &size)) { |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 127 | offset = 0; |
| 128 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 129 | else { |
| 130 | err_clear(); |
| 131 | if (!getargs(args, "(s#i)", &buffer, &size, &offset)) |
| 132 | return NULL; |
| 133 | } |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 134 | if (offset < 0 || offset > size) { |
| 135 | err_setstr(RegexError, "search offset out of range"); |
| 136 | return NULL; |
| 137 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 138 | /* NB: In Emacs 18.57, the documentation for re_search[_2] and |
| 139 | the implementation don't match: the documentation states that |
| 140 | |range| positions are tried, while the code tries |range|+1 |
| 141 | positions. It seems more productive to believe the code! */ |
Guido van Rossum | 2d78590 | 1992-01-26 18:12:41 +0000 | [diff] [blame] | 142 | range = size - offset; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 143 | re->re_regs_valid = 0; |
| 144 | result = re_search(&re->re_patbuf, buffer, size, offset, range, |
| 145 | &re->re_regs); |
| 146 | if (result < -1) { |
| 147 | /* Failure like stack overflow */ |
| 148 | err_setstr(RegexError, "match failure"); |
| 149 | return NULL; |
| 150 | } |
| 151 | re->re_regs_valid = result >= 0; |
| 152 | return newintobject((long)result); /* Position of the match or -1 */ |
| 153 | } |
| 154 | |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 155 | static struct methodlist reg_methods[] = { |
| 156 | {"match", reg_match}, |
| 157 | {"search", reg_search}, |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 158 | {NULL, NULL} /* sentinel */ |
| 159 | }; |
| 160 | |
| 161 | static object * |
| 162 | reg_getattr(re, name) |
| 163 | regexobject *re; |
| 164 | char *name; |
| 165 | { |
Guido van Rossum | b824fc6 | 1992-01-01 14:52:16 +0000 | [diff] [blame] | 166 | if (strcmp(name, "regs") == 0) { |
| 167 | if (!re->re_regs_valid) { |
| 168 | err_setstr(RegexError, |
| 169 | "regs only valid after successful match/search"); |
| 170 | return NULL; |
| 171 | } |
| 172 | return makeresult(&re->re_regs); |
| 173 | } |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 174 | return findmethod(reg_methods, (object *)re, name); |
| 175 | } |
| 176 | |
| 177 | static typeobject Regextype = { |
| 178 | OB_HEAD_INIT(&Typetype) |
| 179 | 0, /*ob_size*/ |
| 180 | "regex", /*tp_name*/ |
| 181 | sizeof(regexobject), /*tp_size*/ |
| 182 | 0, /*tp_itemsize*/ |
| 183 | /* methods */ |
| 184 | reg_dealloc, /*tp_dealloc*/ |
| 185 | 0, /*tp_print*/ |
| 186 | reg_getattr, /*tp_getattr*/ |
| 187 | 0, /*tp_setattr*/ |
| 188 | 0, /*tp_compare*/ |
| 189 | 0, /*tp_repr*/ |
| 190 | }; |
| 191 | |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 192 | static object * |
| 193 | newregexobject(pat, size) |
| 194 | char *pat; |
| 195 | int size; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 196 | { |
| 197 | regexobject *re; |
| 198 | re = NEWOBJ(regexobject, &Regextype); |
| 199 | if (re != NULL) { |
| 200 | char *error; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 201 | re->re_patbuf.buffer = NULL; |
| 202 | re->re_patbuf.allocated = 0; |
| 203 | re->re_patbuf.fastmap = re->re_fastmap; |
| 204 | re->re_patbuf.translate = NULL; |
| 205 | re->re_regs_valid = 0; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 206 | error = re_compile_pattern(pat, size, &re->re_patbuf); |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 207 | if (error != NULL) { |
| 208 | err_setstr(RegexError, error); |
| 209 | DECREF(re); |
| 210 | re = NULL; |
| 211 | } |
| 212 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 213 | return (object *)re; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 214 | } |
| 215 | |
| 216 | static object * |
| 217 | regex_compile(self, args) |
| 218 | object *self; |
| 219 | object *args; |
| 220 | { |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 221 | char *pat; |
| 222 | int size; |
| 223 | if (!getargs(args, "s#", &pat, &size)) |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 224 | return NULL; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 225 | return newregexobject(pat, size); |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 226 | } |
| 227 | |
| 228 | static object *cache_pat; |
| 229 | static object *cache_prog; |
| 230 | |
| 231 | static int |
| 232 | update_cache(pat) |
| 233 | object *pat; |
| 234 | { |
| 235 | if (pat != cache_pat) { |
| 236 | XDECREF(cache_pat); |
| 237 | cache_pat = NULL; |
| 238 | XDECREF(cache_prog); |
| 239 | cache_prog = regex_compile((object *)NULL, pat); |
| 240 | if (cache_prog == NULL) |
| 241 | return -1; |
| 242 | cache_pat = pat; |
| 243 | INCREF(cache_pat); |
| 244 | } |
| 245 | return 0; |
| 246 | } |
| 247 | |
| 248 | static object * |
| 249 | regex_match(self, args) |
| 250 | object *self; |
| 251 | object *args; |
| 252 | { |
| 253 | object *pat, *string; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 254 | if (!getStrStrarg(args, &pat, &string)) |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 255 | return NULL; |
| 256 | if (update_cache(pat) < 0) |
| 257 | return NULL; |
| 258 | return reg_match((regexobject *)cache_prog, string); |
| 259 | } |
| 260 | |
| 261 | static object * |
| 262 | regex_search(self, args) |
| 263 | object *self; |
| 264 | object *args; |
| 265 | { |
| 266 | object *pat, *string; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 267 | if (!getStrStrarg(args, &pat, &string)) |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 268 | return NULL; |
| 269 | if (update_cache(pat) < 0) |
| 270 | return NULL; |
| 271 | return reg_search((regexobject *)cache_prog, string); |
| 272 | } |
| 273 | |
| 274 | static object * |
| 275 | regex_set_syntax(self, args) |
| 276 | object *self, *args; |
| 277 | { |
| 278 | int syntax; |
| 279 | if (!getintarg(args, &syntax)) |
| 280 | return NULL; |
| 281 | syntax = re_set_syntax(syntax); |
| 282 | return newintobject((long)syntax); |
| 283 | } |
| 284 | |
| 285 | static struct methodlist regex_global_methods[] = { |
| 286 | {"compile", regex_compile}, |
| 287 | {"match", regex_match}, |
| 288 | {"search", regex_search}, |
| 289 | {"set_syntax", regex_set_syntax}, |
| 290 | {NULL, NULL} /* sentinel */ |
| 291 | }; |
| 292 | |
| 293 | initregex() |
| 294 | { |
| 295 | object *m, *d; |
| 296 | |
| 297 | m = initmodule("regex", regex_global_methods); |
| 298 | d = getmoduledict(m); |
| 299 | |
| 300 | /* Initialize regex.error exception */ |
| 301 | RegexError = newstringobject("regex.error"); |
| 302 | if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0) |
| 303 | fatal("can't define regex.error"); |
| 304 | } |