blob: 7a289d5c4ecd05841e68d1b125312eaccd341b15 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000015both that copyright notice and this permission notice appear in
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000017Centrum or CWI or Corporation for National Research Initiatives or
18CNRI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior
20permission.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossumd266eb41996-10-25 14:44:06 +000022While CWI is the initial source for this software, a modified version
23is made available by the Corporation for National Research Initiatives
24(CNRI) at the Internet address ftp://ftp.python.org.
25
26STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33PERFORMANCE OF THIS SOFTWARE.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35******************************************************************/
36
37/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038/* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000040
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042
Guido van Rossuma376cc51996-12-05 23:43:35 +000043#include <ctype.h>
44
Guido van Rossum1cab95c1992-01-19 16:31:57 +000045#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000046
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000047static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048
49typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000050 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000051 struct re_pattern_buffer re_patbuf; /* The compiled expression */
52 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000053 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000054 PyObject *re_translate; /* String object for translate table */
55 PyObject *re_lastok; /* String object last matched/searched */
56 PyObject *re_groupindex; /* Group name to index dictionary */
57 PyObject *re_givenpat; /* Pattern with symbolic groups */
58 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000059} regexobject;
60
61/* Regex object methods */
62
63static void
64reg_dealloc(re)
65 regexobject *re;
66{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000067 PyMem_XDEL(re->re_patbuf.buffer);
68 Py_XDECREF(re->re_translate);
69 Py_XDECREF(re->re_lastok);
70 Py_XDECREF(re->re_groupindex);
71 Py_XDECREF(re->re_givenpat);
72 Py_XDECREF(re->re_realpat);
73 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000074}
75
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000076static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000077makeresult(regs)
78 struct re_registers *regs;
79{
Guido van Rossumc1962021996-10-08 14:18:42 +000080 PyObject *v;
81 int i;
82 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000083
Guido van Rossumc1962021996-10-08 14:18:42 +000084 if (filler == NULL) {
85 filler = Py_BuildValue("(ii)", -1, -1);
86 if (filler == NULL)
87 return NULL;
88 }
89 v = PyTuple_New(RE_NREGS);
90 if (v == NULL)
91 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000092
Guido van Rossumc1962021996-10-08 14:18:42 +000093 for (i = 0; i < RE_NREGS; i++) {
94 int lo = regs->start[i];
95 int hi = regs->end[i];
96 PyObject *w;
97 if (lo == -1 && hi == -1) {
98 w = filler;
99 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000100 }
Guido van Rossumc1962021996-10-08 14:18:42 +0000101 else
102 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +0000103 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
104 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +0000105 return NULL;
106 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000107 }
108 return v;
109}
110
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000111static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000112regobj_match(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000113 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000114 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000115{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000116 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000117 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000118 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000119 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000120 int result;
Barry Warsawc3573251996-12-20 21:56:07 +0000121
Guido van Rossum4a807f51997-05-12 16:04:09 +0000122 if (!PyArg_ParseTuple(args, "O|i", &argstring, &offset))
123 return NULL;
124 if (!PyArg_Parse(argstring, "s#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000125 return NULL;
126
Guido van Rossum36d330b1993-02-21 20:12:16 +0000127 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000128 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000129 return NULL;
130 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000131 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000132 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000133 result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
Guido van Rossum0318bd61997-08-14 14:35:12 +0000134 &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000135 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000136 /* Serious failure of some sort; if re_match didn't
137 set an exception, raise a generic error */
138 if (!PyErr_Occurred())
139 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000140 return NULL;
141 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000142 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000143 Py_INCREF(argstring);
144 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000145 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000146 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000147}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000148
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000149static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000150regobj_search(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000151 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000152 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000153{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000154 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000155 char *buffer;
156 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000157 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000158 int range;
159 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000160
Guido van Rossum4a807f51997-05-12 16:04:09 +0000161 if (!PyArg_ParseTuple(args, "O|i", &argstring, &offset))
162 return NULL;
163 if (!PyArg_Parse(argstring, "s#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000164 return NULL;
165
Guido van Rossum36d330b1993-02-21 20:12:16 +0000166 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000167 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000168 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000169 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000170 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
171 the implementation don't match: the documentation states that
172 |range| positions are tried, while the code tries |range|+1
173 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000174 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000175 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000176 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000177 result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000178 &re->re_regs);
179 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000180 /* Serious failure of some sort; if re_match didn't
181 set an exception, raise a generic error */
182 if (!PyErr_Occurred())
183 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000184 return NULL;
185 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000186 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000187 Py_INCREF(argstring);
188 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000189 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000190 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000191}
192
Barry Warsawc3573251996-12-20 21:56:07 +0000193/* get the group from the regex where index can be a string (group name) or
194 an integer index [0 .. 99]
195 */
196static PyObject*
197group_from_index(re, index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000198 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000199 PyObject *index;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000200{
201 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000202 char *v;
203
204 if (PyString_Check(index))
205 if (re->re_groupindex == NULL ||
206 !(index = PyDict_GetItem(re->re_groupindex, index)))
207 {
208 PyErr_SetString(RegexError,
209 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000210 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000211 }
Barry Warsawc3573251996-12-20 21:56:07 +0000212
213 i = PyInt_AsLong(index);
214 if (i == -1 && PyErr_Occurred())
215 return NULL;
216
Guido van Rossum36d330b1993-02-21 20:12:16 +0000217 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000218 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000219 return NULL;
220 }
221 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000222 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000223 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000224 return NULL;
225 }
226 a = re->re_regs.start[i];
227 b = re->re_regs.end[i];
228 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000229 Py_INCREF(Py_None);
230 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000231 }
Barry Warsawc3573251996-12-20 21:56:07 +0000232
233 if (!(v = PyString_AsString(re->re_lastok)))
234 return NULL;
235
236 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000237}
238
Barry Warsawc3573251996-12-20 21:56:07 +0000239
240static PyObject *
241regobj_group(re, args)
242 regexobject *re;
243 PyObject *args;
244{
245 int n = PyTuple_Size(args);
246 int i;
247 PyObject *res = NULL;
248
249 if (n < 0)
250 return NULL;
251 if (n == 0) {
252 PyErr_SetString(PyExc_TypeError, "not enough arguments");
253 return NULL;
254 }
255 if (n == 1) {
256 /* return value is a single string */
257 PyObject *index = PyTuple_GetItem(args, 0);
258 if (!index)
259 return NULL;
260
261 return group_from_index(re, index);
262 }
263
264 /* return value is a tuple */
265 if (!(res = PyTuple_New(n)))
266 return NULL;
267
268 for (i = 0; i < n; i++) {
269 PyObject *index = PyTuple_GetItem(args, i);
270 PyObject *group = NULL;
271
272 if (!index)
273 goto finally;
274 if (!(group = group_from_index(re, index)))
275 goto finally;
276 if (PyTuple_SetItem(res, i, group) < 0)
277 goto finally;
278 }
279 return res;
280
281 finally:
282 Py_DECREF(res);
283 return NULL;
284}
285
286
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000287static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000288 {"match", (PyCFunction)regobj_match, 1},
289 {"search", (PyCFunction)regobj_search, 1},
290 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000291 {NULL, NULL} /* sentinel */
292};
293
Barry Warsawc3573251996-12-20 21:56:07 +0000294
295
296static char* members[] = {
297 "last", "regs", "translate",
298 "groupindex", "realpat", "givenpat",
299 NULL
300};
301
302
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000303static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000304regobj_getattr(re, name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000305 regexobject *re;
306 char *name;
307{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000308 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000309 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000310 Py_INCREF(Py_None);
311 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000312 }
313 return makeresult(&re->re_regs);
314 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000315 if (strcmp(name, "last") == 0) {
316 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000317 Py_INCREF(Py_None);
318 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000319 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000320 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000321 return re->re_lastok;
322 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000323 if (strcmp(name, "translate") == 0) {
324 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000325 Py_INCREF(Py_None);
326 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000327 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000328 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000329 return re->re_translate;
330 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000331 if (strcmp(name, "groupindex") == 0) {
332 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000333 Py_INCREF(Py_None);
334 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000335 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000336 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000337 return re->re_groupindex;
338 }
339 if (strcmp(name, "realpat") == 0) {
340 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000341 Py_INCREF(Py_None);
342 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000343 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000344 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000345 return re->re_realpat;
346 }
347 if (strcmp(name, "givenpat") == 0) {
348 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000349 Py_INCREF(Py_None);
350 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000351 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000352 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000353 return re->re_givenpat;
354 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000355 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000356 int i = 0;
357 PyObject *list = NULL;
358
359 /* okay, so it's unlikely this list will change that often.
360 still, it's easier to change it in just one place.
361 */
362 while (members[i])
363 i++;
364 if (!(list = PyList_New(i)))
365 return NULL;
366
367 i = 0;
368 while (members[i]) {
369 PyObject* v = PyString_FromString(members[i]);
370 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000371 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000372 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000373 }
Barry Warsawc3573251996-12-20 21:56:07 +0000374 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000375 }
376 return list;
377 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000378 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000379}
380
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000381static PyTypeObject Regextype = {
382 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000383 0, /*ob_size*/
384 "regex", /*tp_name*/
385 sizeof(regexobject), /*tp_size*/
386 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000387 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000388 (destructor)reg_dealloc, /*tp_dealloc*/
389 0, /*tp_print*/
390 (getattrfunc)regobj_getattr, /*tp_getattr*/
391 0, /*tp_setattr*/
392 0, /*tp_compare*/
393 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000394};
395
Barry Warsawc3573251996-12-20 21:56:07 +0000396/* reference counting invariants:
397 pattern: borrowed
398 translate: borrowed
399 givenpat: borrowed
400 groupindex: transferred
401*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000402static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000403newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000404 PyObject *pattern;
405 PyObject *translate;
406 PyObject *givenpat;
407 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000408{
409 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000410 char *pat;
411 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000412
Barry Warsawc3573251996-12-20 21:56:07 +0000413 if (!PyArg_Parse(pattern, "s#", &pat, &size))
414 return NULL;
415
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000416 if (translate != NULL && PyString_Size(translate) != 256) {
417 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000418 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000419 return NULL;
420 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000421 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000422 if (re != NULL) {
423 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000424 re->re_patbuf.buffer = NULL;
425 re->re_patbuf.allocated = 0;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000426 re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000427 if (translate) {
Guido van Rossumed2554a1997-08-18 15:31:24 +0000428 re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000429 if (!re->re_patbuf.translate)
430 goto finally;
431 Py_INCREF(translate);
432 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000433 else
434 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000435 re->re_translate = translate;
436 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000437 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000438 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000439 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000440 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000441 re->re_givenpat = givenpat;
Guido van Rossumd19c04a1997-09-03 00:47:36 +0000442 error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000443 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000444 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000445 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000446 }
447 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000448 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000449 finally:
450 Py_DECREF(re);
451 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000452}
453
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000454static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000455regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000456 PyObject *self;
457 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000458{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000459 PyObject *pat = NULL;
460 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000461
462 if (!PyArg_ParseTuple(args, "S|S", &pat, &tran))
463 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000464 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000465}
466
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000467static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000468symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000469 PyObject *pattern;
470 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000471{
Barry Warsawc3573251996-12-20 21:56:07 +0000472 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000473 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000474 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000475 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000476 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000477 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000478 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
479
Barry Warsawc3573251996-12-20 21:56:07 +0000480 if (!(opat = PyString_AsString(pattern)))
481 return NULL;
482
483 if ((sz = PyString_Size(pattern)) < 0)
484 return NULL;
485
486 oend = opat + sz;
487 o = opat;
488
Guido van Rossumab28c561996-06-11 18:33:14 +0000489 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000490 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000491 return pattern;
492 }
493
Barry Warsawc3573251996-12-20 21:56:07 +0000494 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
495 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000496 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000497
498 while (o < oend) {
499 if (*o == '(' && escaped == require_escape) {
500 char *backtrack;
501 escaped = 0;
502 ++group_count;
503 *n++ = *o;
504 if (++o >= oend || *o != '<')
505 continue;
506 /* *o == '<' */
507 if (o+1 < oend && *(o+1) == '>')
508 continue;
509 backtrack = o;
510 g = name_buf;
511 for (++o; o < oend;) {
512 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000513 PyObject *group_name = NULL;
514 PyObject *group_index = NULL;
515 *g++ = '\0';
516 group_name = PyString_FromString(name_buf);
517 group_index = PyInt_FromLong(group_count);
518 if (group_name == NULL ||
519 group_index == NULL ||
520 PyDict_SetItem(gdict, group_name,
521 group_index) != 0)
522 {
523 Py_XDECREF(group_name);
524 Py_XDECREF(group_index);
525 Py_XDECREF(npattern);
526 return NULL;
527 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000528 Py_DECREF(group_name);
529 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000530 ++o; /* eat the '>' */
531 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000532 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000533 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000534 o = backtrack;
535 break;
536 }
537 *g++ = *o++;
538 }
539 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000540 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000541 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000542 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000543 *n++ = *o;
544 while (o < oend && *o != ']') {
545 ++o;
546 *n++ = *o;
547 }
548 if (o < oend)
549 ++o;
550 }
551 else if (*o == '\\') {
552 escaped = 1;
553 *n++ = *o;
554 ++o;
555 }
556 else {
557 escaped = 0;
558 *n++ = *o;
559 ++o;
560 }
561 }
562
Barry Warsawc3573251996-12-20 21:56:07 +0000563 if (!(v = PyString_AsString(npattern))) {
564 Py_DECREF(npattern);
565 return NULL;
566 }
567 /* _PyString_Resize() decrements npattern on failure */
568 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000569 return npattern;
570 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000571 return NULL;
572 }
573
574}
575
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000576static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000577regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000578 PyObject *self;
579 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000580{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000581 PyObject *pattern;
582 PyObject *tran = NULL;
583 PyObject *gdict = NULL;
584 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000585 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000586
587 if (!PyArg_ParseTuple(args, "S|S", &pattern, &tran))
588 return NULL;
589
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000590 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000591 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000592 Py_DECREF(gdict);
593 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000594 return NULL;
595 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000596 retval = newregexobject(npattern, tran, pattern, gdict);
597 Py_DECREF(npattern);
598 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000599}
600
601
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000602static PyObject *cache_pat;
603static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000604
605static int
606update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000607 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000608{
Barry Warsawc3573251996-12-20 21:56:07 +0000609 PyObject *tuple = Py_BuildValue("(O)", pat);
610 int status = 0;
611
612 if (!tuple)
613 return -1;
614
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000615 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000616 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000617 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000618 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000619 cache_prog = regex_compile((PyObject *)NULL, tuple);
620 if (cache_prog == NULL) {
621 status = -1;
622 goto finally;
623 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000624 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000625 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000626 }
Barry Warsawc3573251996-12-20 21:56:07 +0000627 finally:
628 Py_DECREF(tuple);
629 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000630}
631
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000632static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000633regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000634 PyObject *self;
635 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000636{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000637 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000638 PyObject *tuple, *v;
639
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000640 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000641 return NULL;
642 if (update_cache(pat) < 0)
643 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000644
645 if (!(tuple = Py_BuildValue("(S)", string)))
646 return NULL;
647 v = regobj_match((regexobject *)cache_prog, tuple);
648 Py_DECREF(tuple);
649 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000650}
651
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000652static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000653regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000654 PyObject *self;
655 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000656{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000657 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000658 PyObject *tuple, *v;
659
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000660 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000661 return NULL;
662 if (update_cache(pat) < 0)
663 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000664
665 if (!(tuple = Py_BuildValue("(S)", string)))
666 return NULL;
667 v = regobj_search((regexobject *)cache_prog, tuple);
668 Py_DECREF(tuple);
669 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000670}
671
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000672static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000673regex_set_syntax(self, args)
Barry Warsawc3573251996-12-20 21:56:07 +0000674 PyObject *self;
675 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000676{
677 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000678 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000679 return NULL;
680 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000681 /* wipe the global pattern cache */
682 Py_XDECREF(cache_pat);
683 cache_pat = NULL;
684 Py_XDECREF(cache_prog);
685 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000686 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000687}
688
Barry Warsaw909d7c31997-02-18 18:48:50 +0000689static PyObject *
690regex_get_syntax(self, args)
691 PyObject *self;
692 PyObject *args;
693{
694 if (!PyArg_Parse(args, ""))
695 return NULL;
696 return PyInt_FromLong((long)re_syntax);
697}
698
699
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000700static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000701 {"compile", regex_compile, 1},
702 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000703 {"match", regex_match, 0},
704 {"search", regex_search, 0},
705 {"set_syntax", regex_set_syntax, 0},
Barry Warsaw909d7c31997-02-18 18:48:50 +0000706 {"get_syntax", regex_get_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000707 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000708};
709
Guido van Rossum8f3032d1996-08-19 22:03:12 +0000710void
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000711initregex()
712{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000713 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000714 int i;
715 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000716
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000717 m = Py_InitModule("regex", regex_global_methods);
718 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000719
720 /* Initialize regex.error exception */
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000721 v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
Barry Warsawc3573251996-12-20 21:56:07 +0000722 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
723 goto finally;
724
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000725 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000726 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
727 goto finally;
728
729 if (!(s = PyString_AsString(v)))
730 goto finally;
731
732 for (i = 0; i < 256; i++) {
733 if (isupper(i))
734 s[i] = tolower(i);
735 else
736 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000737 }
Barry Warsawc3573251996-12-20 21:56:07 +0000738 if (PyDict_SetItemString(d, "casefold", v) < 0)
739 goto finally;
740 Py_DECREF(v);
741
742 if (!PyErr_Occurred())
743 return;
744 finally:
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000745 /* Nothing */ ;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000746}