Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 1 | /* |
| 2 | XXX support translate table |
| 3 | XXX support range parameter on search |
| 4 | XXX support mstop parameter on search |
| 5 | */ |
| 6 | |
| 7 | /*********************************************************** |
| 8 | Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The |
| 9 | Netherlands. |
| 10 | |
| 11 | All Rights Reserved |
| 12 | |
| 13 | Permission to use, copy, modify, and distribute this software and its |
| 14 | documentation for any purpose and without fee is hereby granted, |
| 15 | provided that the above copyright notice appear in all copies and that |
| 16 | both that copyright notice and this permission notice appear in |
| 17 | supporting documentation, and that the names of Stichting Mathematisch |
| 18 | Centrum or CWI not be used in advertising or publicity pertaining to |
| 19 | distribution of the software without specific, written prior permission. |
| 20 | |
| 21 | STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO |
| 22 | THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 23 | FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE |
| 24 | FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 25 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 26 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
| 27 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 28 | |
| 29 | ******************************************************************/ |
| 30 | |
| 31 | /* Regular expression objects */ |
Guido van Rossum | 1cab95c | 1992-01-19 16:31:57 +0000 | [diff] [blame] | 32 | /* This uses Tatu Ylonen's copyleft-free reimplementation of |
| 33 | GNU regular expressions */ |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 34 | |
| 35 | #include "allobjects.h" |
| 36 | #include "modsupport.h" |
| 37 | |
Guido van Rossum | 1cab95c | 1992-01-19 16:31:57 +0000 | [diff] [blame] | 38 | #include "regexpr.h" |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 39 | |
| 40 | static object *RegexError; /* Exception */ |
| 41 | |
| 42 | typedef struct { |
| 43 | OB_HEAD |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 44 | struct re_pattern_buffer re_patbuf; /* The compiled expression */ |
| 45 | struct re_registers re_regs; /* The registers from the last match */ |
| 46 | int re_regs_valid; /* Nonzero if the registers are valid */ |
| 47 | char re_fastmap[256]; /* Storage for fastmap */ |
| 48 | } regexobject; |
| 49 | |
| 50 | /* Regex object methods */ |
| 51 | |
| 52 | static void |
| 53 | reg_dealloc(re) |
| 54 | regexobject *re; |
| 55 | { |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 56 | XDEL(re->re_patbuf.buffer); |
| 57 | XDEL(re->re_patbuf.translate); |
| 58 | DEL(re); |
| 59 | } |
| 60 | |
| 61 | static object * |
| 62 | makeresult(regs) |
| 63 | struct re_registers *regs; |
| 64 | { |
| 65 | object *v = newtupleobject(RE_NREGS); |
| 66 | if (v != NULL) { |
| 67 | int i; |
| 68 | for (i = 0; i < RE_NREGS; i++) { |
| 69 | object *w, *u; |
| 70 | if ( (w = newtupleobject(2)) == NULL || |
| 71 | (u = newintobject(regs->start[i])) == NULL || |
| 72 | settupleitem(w, 0, u) != 0 || |
| 73 | (u = newintobject(regs->end[i])) == NULL || |
| 74 | settupleitem(w, 1, u) != 0) { |
| 75 | XDECREF(w); |
| 76 | DECREF(v); |
| 77 | return NULL; |
| 78 | } |
| 79 | settupleitem(v, i, w); |
| 80 | } |
| 81 | } |
| 82 | return v; |
| 83 | } |
| 84 | |
| 85 | static object * |
| 86 | reg_match(re, args) |
| 87 | regexobject *re; |
| 88 | object *args; |
| 89 | { |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 90 | char *buffer; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 91 | int size; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 92 | int offset; |
| 93 | int result; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 94 | if (getargs(args, "s#", &buffer, &size)) { |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 95 | offset = 0; |
| 96 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 97 | else { |
| 98 | err_clear(); |
| 99 | if (!getargs(args, "(s#i)", &buffer, &size, &offset)) |
| 100 | return NULL; |
| 101 | } |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 102 | re->re_regs_valid = 0; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 103 | result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs); |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 104 | if (result < -1) { |
| 105 | /* Failure like stack overflow */ |
| 106 | err_setstr(RegexError, "match failure"); |
| 107 | return NULL; |
| 108 | } |
| 109 | re->re_regs_valid = result >= 0; |
| 110 | return newintobject((long)result); /* Length of the match or -1 */ |
| 111 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 112 | |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 113 | static object * |
| 114 | reg_search(re, args) |
| 115 | regexobject *re; |
| 116 | object *args; |
| 117 | { |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 118 | char *buffer; |
| 119 | int size; |
| 120 | int offset; |
| 121 | int range; |
| 122 | int result; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 123 | |
| 124 | if (getargs(args, "s#", &buffer, &size)) { |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 125 | offset = 0; |
| 126 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 127 | else { |
| 128 | err_clear(); |
| 129 | if (!getargs(args, "(s#i)", &buffer, &size, &offset)) |
| 130 | return NULL; |
| 131 | } |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 132 | if (offset < 0 || offset > size) { |
| 133 | err_setstr(RegexError, "search offset out of range"); |
| 134 | return NULL; |
| 135 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 136 | /* NB: In Emacs 18.57, the documentation for re_search[_2] and |
| 137 | the implementation don't match: the documentation states that |
| 138 | |range| positions are tried, while the code tries |range|+1 |
| 139 | positions. It seems more productive to believe the code! */ |
Guido van Rossum | 2d78590 | 1992-01-26 18:12:41 +0000 | [diff] [blame] | 140 | range = size - offset; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 141 | re->re_regs_valid = 0; |
| 142 | result = re_search(&re->re_patbuf, buffer, size, offset, range, |
| 143 | &re->re_regs); |
| 144 | if (result < -1) { |
| 145 | /* Failure like stack overflow */ |
| 146 | err_setstr(RegexError, "match failure"); |
| 147 | return NULL; |
| 148 | } |
| 149 | re->re_regs_valid = result >= 0; |
| 150 | return newintobject((long)result); /* Position of the match or -1 */ |
| 151 | } |
| 152 | |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 153 | static struct methodlist reg_methods[] = { |
| 154 | {"match", reg_match}, |
| 155 | {"search", reg_search}, |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 156 | {NULL, NULL} /* sentinel */ |
| 157 | }; |
| 158 | |
| 159 | static object * |
| 160 | reg_getattr(re, name) |
| 161 | regexobject *re; |
| 162 | char *name; |
| 163 | { |
Guido van Rossum | b824fc6 | 1992-01-01 14:52:16 +0000 | [diff] [blame] | 164 | if (strcmp(name, "regs") == 0) { |
| 165 | if (!re->re_regs_valid) { |
| 166 | err_setstr(RegexError, |
| 167 | "regs only valid after successful match/search"); |
| 168 | return NULL; |
| 169 | } |
| 170 | return makeresult(&re->re_regs); |
| 171 | } |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 172 | return findmethod(reg_methods, (object *)re, name); |
| 173 | } |
| 174 | |
| 175 | static typeobject Regextype = { |
| 176 | OB_HEAD_INIT(&Typetype) |
| 177 | 0, /*ob_size*/ |
| 178 | "regex", /*tp_name*/ |
| 179 | sizeof(regexobject), /*tp_size*/ |
| 180 | 0, /*tp_itemsize*/ |
| 181 | /* methods */ |
| 182 | reg_dealloc, /*tp_dealloc*/ |
| 183 | 0, /*tp_print*/ |
| 184 | reg_getattr, /*tp_getattr*/ |
| 185 | 0, /*tp_setattr*/ |
| 186 | 0, /*tp_compare*/ |
| 187 | 0, /*tp_repr*/ |
| 188 | }; |
| 189 | |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 190 | static object * |
| 191 | newregexobject(pat, size) |
| 192 | char *pat; |
| 193 | int size; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 194 | { |
| 195 | regexobject *re; |
| 196 | re = NEWOBJ(regexobject, &Regextype); |
| 197 | if (re != NULL) { |
| 198 | char *error; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 199 | re->re_patbuf.buffer = NULL; |
| 200 | re->re_patbuf.allocated = 0; |
| 201 | re->re_patbuf.fastmap = re->re_fastmap; |
| 202 | re->re_patbuf.translate = NULL; |
| 203 | re->re_regs_valid = 0; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 204 | error = re_compile_pattern(pat, size, &re->re_patbuf); |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 205 | if (error != NULL) { |
| 206 | err_setstr(RegexError, error); |
| 207 | DECREF(re); |
| 208 | re = NULL; |
| 209 | } |
| 210 | } |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 211 | return (object *)re; |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 212 | } |
| 213 | |
| 214 | static object * |
| 215 | regex_compile(self, args) |
| 216 | object *self; |
| 217 | object *args; |
| 218 | { |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 219 | char *pat; |
| 220 | int size; |
| 221 | if (!getargs(args, "s#", &pat, &size)) |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 222 | return NULL; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 223 | return newregexobject(pat, size); |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 224 | } |
| 225 | |
| 226 | static object *cache_pat; |
| 227 | static object *cache_prog; |
| 228 | |
| 229 | static int |
| 230 | update_cache(pat) |
| 231 | object *pat; |
| 232 | { |
| 233 | if (pat != cache_pat) { |
| 234 | XDECREF(cache_pat); |
| 235 | cache_pat = NULL; |
| 236 | XDECREF(cache_prog); |
| 237 | cache_prog = regex_compile((object *)NULL, pat); |
| 238 | if (cache_prog == NULL) |
| 239 | return -1; |
| 240 | cache_pat = pat; |
| 241 | INCREF(cache_pat); |
| 242 | } |
| 243 | return 0; |
| 244 | } |
| 245 | |
| 246 | static object * |
| 247 | regex_match(self, args) |
| 248 | object *self; |
| 249 | object *args; |
| 250 | { |
| 251 | object *pat, *string; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 252 | if (!getStrStrarg(args, &pat, &string)) |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 253 | return NULL; |
| 254 | if (update_cache(pat) < 0) |
| 255 | return NULL; |
| 256 | return reg_match((regexobject *)cache_prog, string); |
| 257 | } |
| 258 | |
| 259 | static object * |
| 260 | regex_search(self, args) |
| 261 | object *self; |
| 262 | object *args; |
| 263 | { |
| 264 | object *pat, *string; |
Guido van Rossum | d577c0c | 1992-01-27 16:46:19 +0000 | [diff] [blame] | 265 | if (!getStrStrarg(args, &pat, &string)) |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 266 | return NULL; |
| 267 | if (update_cache(pat) < 0) |
| 268 | return NULL; |
| 269 | return reg_search((regexobject *)cache_prog, string); |
| 270 | } |
| 271 | |
| 272 | static object * |
| 273 | regex_set_syntax(self, args) |
| 274 | object *self, *args; |
| 275 | { |
| 276 | int syntax; |
| 277 | if (!getintarg(args, &syntax)) |
| 278 | return NULL; |
| 279 | syntax = re_set_syntax(syntax); |
| 280 | return newintobject((long)syntax); |
| 281 | } |
| 282 | |
| 283 | static struct methodlist regex_global_methods[] = { |
| 284 | {"compile", regex_compile}, |
| 285 | {"match", regex_match}, |
| 286 | {"search", regex_search}, |
| 287 | {"set_syntax", regex_set_syntax}, |
| 288 | {NULL, NULL} /* sentinel */ |
| 289 | }; |
| 290 | |
| 291 | initregex() |
| 292 | { |
| 293 | object *m, *d; |
| 294 | |
| 295 | m = initmodule("regex", regex_global_methods); |
| 296 | d = getmoduledict(m); |
| 297 | |
| 298 | /* Initialize regex.error exception */ |
| 299 | RegexError = newstringobject("regex.error"); |
| 300 | if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0) |
| 301 | fatal("can't define regex.error"); |
| 302 | } |