Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 1 | /* |
| 2 | XXX support translate table |
| 3 | XXX support range parameter on search |
| 4 | XXX support mstop parameter on search |
| 5 | */ |
| 6 | |
| 7 | /*********************************************************** |
| 8 | Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The |
| 9 | Netherlands. |
| 10 | |
| 11 | All Rights Reserved |
| 12 | |
| 13 | Permission to use, copy, modify, and distribute this software and its |
| 14 | documentation for any purpose and without fee is hereby granted, |
| 15 | provided that the above copyright notice appear in all copies and that |
| 16 | both that copyright notice and this permission notice appear in |
| 17 | supporting documentation, and that the names of Stichting Mathematisch |
| 18 | Centrum or CWI not be used in advertising or publicity pertaining to |
| 19 | distribution of the software without specific, written prior permission. |
| 20 | |
| 21 | STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO |
| 22 | THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 23 | FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE |
| 24 | FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 25 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 26 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
| 27 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 28 | |
| 29 | ******************************************************************/ |
| 30 | |
| 31 | /* Regular expression objects */ |
| 32 | /* This uses GNU regex.c, from subdirectory regex !!! */ |
| 33 | |
| 34 | #include "allobjects.h" |
| 35 | #include "modsupport.h" |
| 36 | |
| 37 | #include "regex.h" |
| 38 | |
| 39 | static object *RegexError; /* Exception */ |
| 40 | |
| 41 | typedef struct { |
| 42 | OB_HEAD |
| 43 | object *re_string; /* The string (for printing) */ |
| 44 | struct re_pattern_buffer re_patbuf; /* The compiled expression */ |
| 45 | struct re_registers re_regs; /* The registers from the last match */ |
| 46 | int re_regs_valid; /* Nonzero if the registers are valid */ |
| 47 | char re_fastmap[256]; /* Storage for fastmap */ |
| 48 | } regexobject; |
| 49 | |
| 50 | /* Regex object methods */ |
| 51 | |
| 52 | static void |
| 53 | reg_dealloc(re) |
| 54 | regexobject *re; |
| 55 | { |
| 56 | XDECREF(re->re_string); |
| 57 | XDEL(re->re_patbuf.buffer); |
| 58 | XDEL(re->re_patbuf.translate); |
| 59 | DEL(re); |
| 60 | } |
| 61 | |
| 62 | static object * |
| 63 | makeresult(regs) |
| 64 | struct re_registers *regs; |
| 65 | { |
| 66 | object *v = newtupleobject(RE_NREGS); |
| 67 | if (v != NULL) { |
| 68 | int i; |
| 69 | for (i = 0; i < RE_NREGS; i++) { |
| 70 | object *w, *u; |
| 71 | if ( (w = newtupleobject(2)) == NULL || |
| 72 | (u = newintobject(regs->start[i])) == NULL || |
| 73 | settupleitem(w, 0, u) != 0 || |
| 74 | (u = newintobject(regs->end[i])) == NULL || |
| 75 | settupleitem(w, 1, u) != 0) { |
| 76 | XDECREF(w); |
| 77 | DECREF(v); |
| 78 | return NULL; |
| 79 | } |
| 80 | settupleitem(v, i, w); |
| 81 | } |
| 82 | } |
| 83 | return v; |
| 84 | } |
| 85 | |
| 86 | static object * |
| 87 | reg_match(re, args) |
| 88 | regexobject *re; |
| 89 | object *args; |
| 90 | { |
| 91 | object *v; |
| 92 | char *buffer; |
| 93 | int offset; |
| 94 | int result; |
| 95 | if (args != NULL && is_stringobject(args)) { |
| 96 | v = args; |
| 97 | offset = 0; |
| 98 | } |
| 99 | else if (!getstrintarg(args, &v, &offset)) |
| 100 | return NULL; |
| 101 | buffer = getstringvalue(v); |
| 102 | re->re_regs_valid = 0; |
| 103 | result = re_match(&re->re_patbuf, buffer, getstringsize(v), |
| 104 | offset, &re->re_regs); |
| 105 | if (result < -1) { |
| 106 | /* Failure like stack overflow */ |
| 107 | err_setstr(RegexError, "match failure"); |
| 108 | return NULL; |
| 109 | } |
| 110 | re->re_regs_valid = result >= 0; |
| 111 | return newintobject((long)result); /* Length of the match or -1 */ |
| 112 | } |
| 113 | static object * |
| 114 | reg_search(re, args) |
| 115 | regexobject *re; |
| 116 | object *args; |
| 117 | { |
| 118 | object *v; |
| 119 | char *buffer; |
| 120 | int size; |
| 121 | int offset; |
| 122 | int range; |
| 123 | int result; |
| 124 | if (args != NULL && is_stringobject(args)) { |
| 125 | v = args; |
| 126 | offset = 0; |
| 127 | } |
| 128 | else if (!getstrintarg(args, &v, &offset)) |
| 129 | return NULL; |
| 130 | buffer = getstringvalue(v); |
| 131 | size = getstringsize(v); |
| 132 | if (offset < 0 || offset > size) { |
| 133 | err_setstr(RegexError, "search offset out of range"); |
| 134 | return NULL; |
| 135 | } |
| 136 | range = size - offset + 1; |
| 137 | re->re_regs_valid = 0; |
| 138 | result = re_search(&re->re_patbuf, buffer, size, offset, range, |
| 139 | &re->re_regs); |
| 140 | if (result < -1) { |
| 141 | /* Failure like stack overflow */ |
| 142 | err_setstr(RegexError, "match failure"); |
| 143 | return NULL; |
| 144 | } |
| 145 | re->re_regs_valid = result >= 0; |
| 146 | return newintobject((long)result); /* Position of the match or -1 */ |
| 147 | } |
| 148 | |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 149 | static struct methodlist reg_methods[] = { |
| 150 | {"match", reg_match}, |
| 151 | {"search", reg_search}, |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 152 | {NULL, NULL} /* sentinel */ |
| 153 | }; |
| 154 | |
| 155 | static object * |
| 156 | reg_getattr(re, name) |
| 157 | regexobject *re; |
| 158 | char *name; |
| 159 | { |
Guido van Rossum | b824fc6 | 1992-01-01 14:52:16 +0000 | [diff] [blame] | 160 | if (strcmp(name, "regs") == 0) { |
| 161 | if (!re->re_regs_valid) { |
| 162 | err_setstr(RegexError, |
| 163 | "regs only valid after successful match/search"); |
| 164 | return NULL; |
| 165 | } |
| 166 | return makeresult(&re->re_regs); |
| 167 | } |
Guido van Rossum | 6f4c43d | 1991-12-30 01:42:57 +0000 | [diff] [blame] | 168 | return findmethod(reg_methods, (object *)re, name); |
| 169 | } |
| 170 | |
| 171 | static typeobject Regextype = { |
| 172 | OB_HEAD_INIT(&Typetype) |
| 173 | 0, /*ob_size*/ |
| 174 | "regex", /*tp_name*/ |
| 175 | sizeof(regexobject), /*tp_size*/ |
| 176 | 0, /*tp_itemsize*/ |
| 177 | /* methods */ |
| 178 | reg_dealloc, /*tp_dealloc*/ |
| 179 | 0, /*tp_print*/ |
| 180 | reg_getattr, /*tp_getattr*/ |
| 181 | 0, /*tp_setattr*/ |
| 182 | 0, /*tp_compare*/ |
| 183 | 0, /*tp_repr*/ |
| 184 | }; |
| 185 | |
| 186 | static regexobject * |
| 187 | newregexobject(string) |
| 188 | object *string; |
| 189 | { |
| 190 | regexobject *re; |
| 191 | re = NEWOBJ(regexobject, &Regextype); |
| 192 | if (re != NULL) { |
| 193 | char *error; |
| 194 | INCREF(string); |
| 195 | re->re_string = string; |
| 196 | re->re_patbuf.buffer = NULL; |
| 197 | re->re_patbuf.allocated = 0; |
| 198 | re->re_patbuf.fastmap = re->re_fastmap; |
| 199 | re->re_patbuf.translate = NULL; |
| 200 | re->re_regs_valid = 0; |
| 201 | error = re_compile_pattern(getstringvalue(string), |
| 202 | getstringsize(string), |
| 203 | &re->re_patbuf); |
| 204 | if (error != NULL) { |
| 205 | err_setstr(RegexError, error); |
| 206 | DECREF(re); |
| 207 | re = NULL; |
| 208 | } |
| 209 | } |
| 210 | return re; |
| 211 | } |
| 212 | |
| 213 | static object * |
| 214 | regex_compile(self, args) |
| 215 | object *self; |
| 216 | object *args; |
| 217 | { |
| 218 | object *string; |
| 219 | if (!getstrarg(args, &string)) |
| 220 | return NULL; |
| 221 | return (object *)newregexobject(string); |
| 222 | } |
| 223 | |
| 224 | static object *cache_pat; |
| 225 | static object *cache_prog; |
| 226 | |
| 227 | static int |
| 228 | update_cache(pat) |
| 229 | object *pat; |
| 230 | { |
| 231 | if (pat != cache_pat) { |
| 232 | XDECREF(cache_pat); |
| 233 | cache_pat = NULL; |
| 234 | XDECREF(cache_prog); |
| 235 | cache_prog = regex_compile((object *)NULL, pat); |
| 236 | if (cache_prog == NULL) |
| 237 | return -1; |
| 238 | cache_pat = pat; |
| 239 | INCREF(cache_pat); |
| 240 | } |
| 241 | return 0; |
| 242 | } |
| 243 | |
| 244 | static object * |
| 245 | regex_match(self, args) |
| 246 | object *self; |
| 247 | object *args; |
| 248 | { |
| 249 | object *pat, *string; |
| 250 | if (!getstrstrarg(args, &pat, &string)) |
| 251 | return NULL; |
| 252 | if (update_cache(pat) < 0) |
| 253 | return NULL; |
| 254 | return reg_match((regexobject *)cache_prog, string); |
| 255 | } |
| 256 | |
| 257 | static object * |
| 258 | regex_search(self, args) |
| 259 | object *self; |
| 260 | object *args; |
| 261 | { |
| 262 | object *pat, *string; |
| 263 | if (!getstrstrarg(args, &pat, &string)) |
| 264 | return NULL; |
| 265 | if (update_cache(pat) < 0) |
| 266 | return NULL; |
| 267 | return reg_search((regexobject *)cache_prog, string); |
| 268 | } |
| 269 | |
| 270 | static object * |
| 271 | regex_set_syntax(self, args) |
| 272 | object *self, *args; |
| 273 | { |
| 274 | int syntax; |
| 275 | if (!getintarg(args, &syntax)) |
| 276 | return NULL; |
| 277 | syntax = re_set_syntax(syntax); |
| 278 | return newintobject((long)syntax); |
| 279 | } |
| 280 | |
| 281 | static struct methodlist regex_global_methods[] = { |
| 282 | {"compile", regex_compile}, |
| 283 | {"match", regex_match}, |
| 284 | {"search", regex_search}, |
| 285 | {"set_syntax", regex_set_syntax}, |
| 286 | {NULL, NULL} /* sentinel */ |
| 287 | }; |
| 288 | |
| 289 | initregex() |
| 290 | { |
| 291 | object *m, *d; |
| 292 | |
| 293 | m = initmodule("regex", regex_global_methods); |
| 294 | d = getmoduledict(m); |
| 295 | |
| 296 | /* Initialize regex.error exception */ |
| 297 | RegexError = newstringobject("regex.error"); |
| 298 | if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0) |
| 299 | fatal("can't define regex.error"); |
| 300 | } |