blob: 92d3b71cacadfb5fad35d10f433b17742bd7efb8 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000015both that copyright notice and this permission notice appear in
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000017Centrum or CWI or Corporation for National Research Initiatives or
18CNRI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior
20permission.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossumd266eb41996-10-25 14:44:06 +000022While CWI is the initial source for this software, a modified version
23is made available by the Corporation for National Research Initiatives
24(CNRI) at the Internet address ftp://ftp.python.org.
25
26STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33PERFORMANCE OF THIS SOFTWARE.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35******************************************************************/
36
37/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038/* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000040
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042
Guido van Rossuma376cc51996-12-05 23:43:35 +000043#include <ctype.h>
44
Guido van Rossum1cab95c1992-01-19 16:31:57 +000045#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000046
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000047static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048
49typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000050 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000051 struct re_pattern_buffer re_patbuf; /* The compiled expression */
52 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000053 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000054 PyObject *re_translate; /* String object for translate table */
55 PyObject *re_lastok; /* String object last matched/searched */
56 PyObject *re_groupindex; /* Group name to index dictionary */
57 PyObject *re_givenpat; /* Pattern with symbolic groups */
58 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000059} regexobject;
60
61/* Regex object methods */
62
63static void
64reg_dealloc(re)
65 regexobject *re;
66{
Guido van Rossumb18618d2000-05-03 23:44:39 +000067 if (re->re_patbuf.buffer)
68 PyMem_DEL(re->re_patbuf.buffer);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000069 Py_XDECREF(re->re_translate);
70 Py_XDECREF(re->re_lastok);
71 Py_XDECREF(re->re_groupindex);
72 Py_XDECREF(re->re_givenpat);
73 Py_XDECREF(re->re_realpat);
Guido van Rossumb18618d2000-05-03 23:44:39 +000074 PyObject_Del(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000075}
76
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000077static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000078makeresult(regs)
79 struct re_registers *regs;
80{
Guido van Rossumc1962021996-10-08 14:18:42 +000081 PyObject *v;
82 int i;
83 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000084
Guido van Rossumc1962021996-10-08 14:18:42 +000085 if (filler == NULL) {
86 filler = Py_BuildValue("(ii)", -1, -1);
87 if (filler == NULL)
88 return NULL;
89 }
90 v = PyTuple_New(RE_NREGS);
91 if (v == NULL)
92 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000093
Guido van Rossumc1962021996-10-08 14:18:42 +000094 for (i = 0; i < RE_NREGS; i++) {
95 int lo = regs->start[i];
96 int hi = regs->end[i];
97 PyObject *w;
98 if (lo == -1 && hi == -1) {
99 w = filler;
100 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000101 }
Guido van Rossumc1962021996-10-08 14:18:42 +0000102 else
103 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +0000104 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
105 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +0000106 return NULL;
107 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000108 }
109 return v;
110}
111
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000112static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000113regobj_match(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000114 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000115 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000116{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000117 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000118 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000119 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000120 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000121 int result;
Barry Warsawc3573251996-12-20 21:56:07 +0000122
Guido van Rossum43713e52000-02-29 13:59:29 +0000123 if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +0000124 return NULL;
Guido van Rossum7e488981998-10-08 02:25:24 +0000125 if (!PyArg_Parse(argstring, "t#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000126 return NULL;
127
Guido van Rossum36d330b1993-02-21 20:12:16 +0000128 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000129 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000130 return NULL;
131 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000132 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000133 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000134 result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
Guido van Rossum0318bd61997-08-14 14:35:12 +0000135 &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000136 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000137 /* Serious failure of some sort; if re_match didn't
138 set an exception, raise a generic error */
139 if (!PyErr_Occurred())
140 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000141 return NULL;
142 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000143 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000144 Py_INCREF(argstring);
145 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000146 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000147 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000148}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000149
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000150static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000151regobj_search(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000152 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000153 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000154{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000155 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000156 char *buffer;
157 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000158 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000159 int range;
160 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000161
Guido van Rossum43713e52000-02-29 13:59:29 +0000162 if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +0000163 return NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000164 if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000165 return NULL;
166
Guido van Rossum36d330b1993-02-21 20:12:16 +0000167 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000168 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000169 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000170 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000171 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
172 the implementation don't match: the documentation states that
173 |range| positions are tried, while the code tries |range|+1
174 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000175 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000176 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000177 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000178 result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000179 &re->re_regs);
180 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000181 /* Serious failure of some sort; if re_match didn't
182 set an exception, raise a generic error */
183 if (!PyErr_Occurred())
184 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000185 return NULL;
186 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000187 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000188 Py_INCREF(argstring);
189 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000190 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000191 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000192}
193
Barry Warsawc3573251996-12-20 21:56:07 +0000194/* get the group from the regex where index can be a string (group name) or
195 an integer index [0 .. 99]
196 */
197static PyObject*
198group_from_index(re, index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000199 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000200 PyObject *index;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000201{
202 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000203 char *v;
204
205 if (PyString_Check(index))
206 if (re->re_groupindex == NULL ||
207 !(index = PyDict_GetItem(re->re_groupindex, index)))
208 {
209 PyErr_SetString(RegexError,
210 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000211 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000212 }
Barry Warsawc3573251996-12-20 21:56:07 +0000213
214 i = PyInt_AsLong(index);
215 if (i == -1 && PyErr_Occurred())
216 return NULL;
217
Guido van Rossum36d330b1993-02-21 20:12:16 +0000218 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000219 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000220 return NULL;
221 }
222 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000223 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000224 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000225 return NULL;
226 }
227 a = re->re_regs.start[i];
228 b = re->re_regs.end[i];
229 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000230 Py_INCREF(Py_None);
231 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000232 }
Barry Warsawc3573251996-12-20 21:56:07 +0000233
234 if (!(v = PyString_AsString(re->re_lastok)))
235 return NULL;
236
237 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000238}
239
Barry Warsawc3573251996-12-20 21:56:07 +0000240
241static PyObject *
242regobj_group(re, args)
243 regexobject *re;
244 PyObject *args;
245{
246 int n = PyTuple_Size(args);
247 int i;
248 PyObject *res = NULL;
249
250 if (n < 0)
251 return NULL;
252 if (n == 0) {
253 PyErr_SetString(PyExc_TypeError, "not enough arguments");
254 return NULL;
255 }
256 if (n == 1) {
257 /* return value is a single string */
258 PyObject *index = PyTuple_GetItem(args, 0);
259 if (!index)
260 return NULL;
261
262 return group_from_index(re, index);
263 }
264
265 /* return value is a tuple */
266 if (!(res = PyTuple_New(n)))
267 return NULL;
268
269 for (i = 0; i < n; i++) {
270 PyObject *index = PyTuple_GetItem(args, i);
271 PyObject *group = NULL;
272
273 if (!index)
274 goto finally;
275 if (!(group = group_from_index(re, index)))
276 goto finally;
277 if (PyTuple_SetItem(res, i, group) < 0)
278 goto finally;
279 }
280 return res;
281
282 finally:
283 Py_DECREF(res);
284 return NULL;
285}
286
287
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000288static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000289 {"match", (PyCFunction)regobj_match, 1},
290 {"search", (PyCFunction)regobj_search, 1},
291 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000292 {NULL, NULL} /* sentinel */
293};
294
Barry Warsawc3573251996-12-20 21:56:07 +0000295
296
297static char* members[] = {
298 "last", "regs", "translate",
299 "groupindex", "realpat", "givenpat",
300 NULL
301};
302
303
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000304static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000305regobj_getattr(re, name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000306 regexobject *re;
307 char *name;
308{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000309 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000310 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000311 Py_INCREF(Py_None);
312 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000313 }
314 return makeresult(&re->re_regs);
315 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000316 if (strcmp(name, "last") == 0) {
317 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000318 Py_INCREF(Py_None);
319 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000320 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000321 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000322 return re->re_lastok;
323 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000324 if (strcmp(name, "translate") == 0) {
325 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000326 Py_INCREF(Py_None);
327 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000328 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000329 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000330 return re->re_translate;
331 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000332 if (strcmp(name, "groupindex") == 0) {
333 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000334 Py_INCREF(Py_None);
335 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000336 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000337 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000338 return re->re_groupindex;
339 }
340 if (strcmp(name, "realpat") == 0) {
341 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000342 Py_INCREF(Py_None);
343 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000344 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000345 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000346 return re->re_realpat;
347 }
348 if (strcmp(name, "givenpat") == 0) {
349 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000350 Py_INCREF(Py_None);
351 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000352 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000353 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000354 return re->re_givenpat;
355 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000356 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000357 int i = 0;
358 PyObject *list = NULL;
359
360 /* okay, so it's unlikely this list will change that often.
361 still, it's easier to change it in just one place.
362 */
363 while (members[i])
364 i++;
365 if (!(list = PyList_New(i)))
366 return NULL;
367
368 i = 0;
369 while (members[i]) {
370 PyObject* v = PyString_FromString(members[i]);
371 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000372 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000373 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000374 }
Barry Warsawc3573251996-12-20 21:56:07 +0000375 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000376 }
377 return list;
378 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000379 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000380}
381
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000382static PyTypeObject Regextype = {
383 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000384 0, /*ob_size*/
385 "regex", /*tp_name*/
386 sizeof(regexobject), /*tp_size*/
387 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000388 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000389 (destructor)reg_dealloc, /*tp_dealloc*/
390 0, /*tp_print*/
391 (getattrfunc)regobj_getattr, /*tp_getattr*/
392 0, /*tp_setattr*/
393 0, /*tp_compare*/
394 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000395};
396
Barry Warsawc3573251996-12-20 21:56:07 +0000397/* reference counting invariants:
398 pattern: borrowed
399 translate: borrowed
400 givenpat: borrowed
401 groupindex: transferred
402*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000403static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000404newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000405 PyObject *pattern;
406 PyObject *translate;
407 PyObject *givenpat;
408 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000409{
410 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000411 char *pat;
412 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000413
Guido van Rossum7e488981998-10-08 02:25:24 +0000414 if (!PyArg_Parse(pattern, "t#", &pat, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000415 return NULL;
416
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000417 if (translate != NULL && PyString_Size(translate) != 256) {
418 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000419 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000420 return NULL;
421 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000422 re = PyObject_New(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000423 if (re != NULL) {
424 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000425 re->re_patbuf.buffer = NULL;
426 re->re_patbuf.allocated = 0;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000427 re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000428 if (translate) {
Guido van Rossumed2554a1997-08-18 15:31:24 +0000429 re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000430 if (!re->re_patbuf.translate)
431 goto finally;
432 Py_INCREF(translate);
433 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000434 else
435 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000436 re->re_translate = translate;
437 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000438 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000439 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000440 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000441 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000442 re->re_givenpat = givenpat;
Guido van Rossumd19c04a1997-09-03 00:47:36 +0000443 error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000444 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000445 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000446 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000447 }
448 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000449 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000450 finally:
451 Py_DECREF(re);
452 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000453}
454
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000455static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000456regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000457 PyObject *self;
458 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000459{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000460 PyObject *pat = NULL;
461 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000462
Guido van Rossum43713e52000-02-29 13:59:29 +0000463 if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000464 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000465 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000466}
467
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000468static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000469symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000470 PyObject *pattern;
471 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000472{
Barry Warsawc3573251996-12-20 21:56:07 +0000473 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000474 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000475 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000476 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000477 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000478 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000479 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
480
Barry Warsawc3573251996-12-20 21:56:07 +0000481 if (!(opat = PyString_AsString(pattern)))
482 return NULL;
483
484 if ((sz = PyString_Size(pattern)) < 0)
485 return NULL;
486
487 oend = opat + sz;
488 o = opat;
489
Guido van Rossumab28c561996-06-11 18:33:14 +0000490 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000491 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000492 return pattern;
493 }
494
Barry Warsawc3573251996-12-20 21:56:07 +0000495 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
496 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000497 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000498
499 while (o < oend) {
500 if (*o == '(' && escaped == require_escape) {
501 char *backtrack;
502 escaped = 0;
503 ++group_count;
504 *n++ = *o;
505 if (++o >= oend || *o != '<')
506 continue;
507 /* *o == '<' */
508 if (o+1 < oend && *(o+1) == '>')
509 continue;
510 backtrack = o;
511 g = name_buf;
512 for (++o; o < oend;) {
513 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000514 PyObject *group_name = NULL;
515 PyObject *group_index = NULL;
516 *g++ = '\0';
517 group_name = PyString_FromString(name_buf);
518 group_index = PyInt_FromLong(group_count);
519 if (group_name == NULL ||
520 group_index == NULL ||
521 PyDict_SetItem(gdict, group_name,
522 group_index) != 0)
523 {
524 Py_XDECREF(group_name);
525 Py_XDECREF(group_index);
526 Py_XDECREF(npattern);
527 return NULL;
528 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000529 Py_DECREF(group_name);
530 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000531 ++o; /* eat the '>' */
532 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000533 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000534 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000535 o = backtrack;
536 break;
537 }
538 *g++ = *o++;
539 }
540 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000541 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000542 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000543 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000544 *n++ = *o;
545 while (o < oend && *o != ']') {
546 ++o;
547 *n++ = *o;
548 }
549 if (o < oend)
550 ++o;
551 }
552 else if (*o == '\\') {
553 escaped = 1;
554 *n++ = *o;
555 ++o;
556 }
557 else {
558 escaped = 0;
559 *n++ = *o;
560 ++o;
561 }
562 }
563
Barry Warsawc3573251996-12-20 21:56:07 +0000564 if (!(v = PyString_AsString(npattern))) {
565 Py_DECREF(npattern);
566 return NULL;
567 }
568 /* _PyString_Resize() decrements npattern on failure */
569 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000570 return npattern;
571 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000572 return NULL;
573 }
574
575}
576
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000577static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000578regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000579 PyObject *self;
580 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000581{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000582 PyObject *pattern;
583 PyObject *tran = NULL;
584 PyObject *gdict = NULL;
585 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000586 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000587
Guido van Rossum43713e52000-02-29 13:59:29 +0000588 if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000589 return NULL;
590
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000591 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000592 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000593 Py_DECREF(gdict);
594 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000595 return NULL;
596 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000597 retval = newregexobject(npattern, tran, pattern, gdict);
598 Py_DECREF(npattern);
599 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000600}
601
602
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000603static PyObject *cache_pat;
604static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000605
606static int
607update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000608 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000609{
Barry Warsawc3573251996-12-20 21:56:07 +0000610 PyObject *tuple = Py_BuildValue("(O)", pat);
611 int status = 0;
612
613 if (!tuple)
614 return -1;
615
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000616 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000617 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000618 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000619 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000620 cache_prog = regex_compile((PyObject *)NULL, tuple);
621 if (cache_prog == NULL) {
622 status = -1;
623 goto finally;
624 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000625 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000626 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000627 }
Barry Warsawc3573251996-12-20 21:56:07 +0000628 finally:
629 Py_DECREF(tuple);
630 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000631}
632
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000633static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000634regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000635 PyObject *self;
636 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000637{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000638 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000639 PyObject *tuple, *v;
640
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000641 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000642 return NULL;
643 if (update_cache(pat) < 0)
644 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000645
646 if (!(tuple = Py_BuildValue("(S)", string)))
647 return NULL;
648 v = regobj_match((regexobject *)cache_prog, tuple);
649 Py_DECREF(tuple);
650 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000651}
652
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000653static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000654regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000655 PyObject *self;
656 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000657{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000658 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000659 PyObject *tuple, *v;
660
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000661 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000662 return NULL;
663 if (update_cache(pat) < 0)
664 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000665
666 if (!(tuple = Py_BuildValue("(S)", string)))
667 return NULL;
668 v = regobj_search((regexobject *)cache_prog, tuple);
669 Py_DECREF(tuple);
670 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000671}
672
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000673static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000674regex_set_syntax(self, args)
Barry Warsawc3573251996-12-20 21:56:07 +0000675 PyObject *self;
676 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000677{
678 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000679 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000680 return NULL;
681 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000682 /* wipe the global pattern cache */
683 Py_XDECREF(cache_pat);
684 cache_pat = NULL;
685 Py_XDECREF(cache_prog);
686 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000687 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000688}
689
Barry Warsaw909d7c31997-02-18 18:48:50 +0000690static PyObject *
691regex_get_syntax(self, args)
692 PyObject *self;
693 PyObject *args;
694{
695 if (!PyArg_Parse(args, ""))
696 return NULL;
697 return PyInt_FromLong((long)re_syntax);
698}
699
700
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000701static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000702 {"compile", regex_compile, 1},
703 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000704 {"match", regex_match, 0},
705 {"search", regex_search, 0},
706 {"set_syntax", regex_set_syntax, 0},
Barry Warsaw909d7c31997-02-18 18:48:50 +0000707 {"get_syntax", regex_get_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000708 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000709};
710
Guido van Rossum3886bb61998-12-04 18:50:17 +0000711DL_EXPORT(void)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000712initregex()
713{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000714 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000715 int i;
716 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000717
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000718 m = Py_InitModule("regex", regex_global_methods);
719 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000720
721 /* Initialize regex.error exception */
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000722 v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
Barry Warsawc3573251996-12-20 21:56:07 +0000723 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
724 goto finally;
725
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000726 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000727 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
728 goto finally;
729
730 if (!(s = PyString_AsString(v)))
731 goto finally;
732
733 for (i = 0; i < 256; i++) {
734 if (isupper(i))
735 s[i] = tolower(i);
736 else
737 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000738 }
Barry Warsawc3573251996-12-20 21:56:07 +0000739 if (PyDict_SetItemString(d, "casefold", v) < 0)
740 goto finally;
741 Py_DECREF(v);
742
743 if (!PyErr_Occurred())
744 return;
745 finally:
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000746 /* Nothing */ ;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000747}