blob: 32360f24960f7a9935c4ec5038b2acf488522bd1 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000015both that copyright notice and this permission notice appear in
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000017Centrum or CWI or Corporation for National Research Initiatives or
18CNRI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior
20permission.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossumd266eb41996-10-25 14:44:06 +000022While CWI is the initial source for this software, a modified version
23is made available by the Corporation for National Research Initiatives
24(CNRI) at the Internet address ftp://ftp.python.org.
25
26STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33PERFORMANCE OF THIS SOFTWARE.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35******************************************************************/
36
37/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038/* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000040
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042
Guido van Rossuma376cc51996-12-05 23:43:35 +000043#include <ctype.h>
44
Guido van Rossum1cab95c1992-01-19 16:31:57 +000045#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000046
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000047static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048
49typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000050 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000051 struct re_pattern_buffer re_patbuf; /* The compiled expression */
52 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000053 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000054 PyObject *re_translate; /* String object for translate table */
55 PyObject *re_lastok; /* String object last matched/searched */
56 PyObject *re_groupindex; /* Group name to index dictionary */
57 PyObject *re_givenpat; /* Pattern with symbolic groups */
58 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000059} regexobject;
60
61/* Regex object methods */
62
63static void
64reg_dealloc(re)
65 regexobject *re;
66{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000067 PyMem_XDEL(re->re_patbuf.buffer);
68 Py_XDECREF(re->re_translate);
69 Py_XDECREF(re->re_lastok);
70 Py_XDECREF(re->re_groupindex);
71 Py_XDECREF(re->re_givenpat);
72 Py_XDECREF(re->re_realpat);
73 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000074}
75
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000076static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000077makeresult(regs)
78 struct re_registers *regs;
79{
Guido van Rossumc1962021996-10-08 14:18:42 +000080 PyObject *v;
81 int i;
82 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000083
Guido van Rossumc1962021996-10-08 14:18:42 +000084 if (filler == NULL) {
85 filler = Py_BuildValue("(ii)", -1, -1);
86 if (filler == NULL)
87 return NULL;
88 }
89 v = PyTuple_New(RE_NREGS);
90 if (v == NULL)
91 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000092
Guido van Rossumc1962021996-10-08 14:18:42 +000093 for (i = 0; i < RE_NREGS; i++) {
94 int lo = regs->start[i];
95 int hi = regs->end[i];
96 PyObject *w;
97 if (lo == -1 && hi == -1) {
98 w = filler;
99 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000100 }
Guido van Rossumc1962021996-10-08 14:18:42 +0000101 else
102 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +0000103 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
104 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +0000105 return NULL;
106 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000107 }
108 return v;
109}
110
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000111static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000112regobj_match(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000113 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000114 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000115{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000116 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000117 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000118 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000119 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000120 int result;
Barry Warsawc3573251996-12-20 21:56:07 +0000121
Guido van Rossum4a807f51997-05-12 16:04:09 +0000122 if (!PyArg_ParseTuple(args, "O|i", &argstring, &offset))
123 return NULL;
124 if (!PyArg_Parse(argstring, "s#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000125 return NULL;
126
Guido van Rossum36d330b1993-02-21 20:12:16 +0000127 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000128 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000129 return NULL;
130 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000131 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000132 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000133 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000134 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000135 /* Serious failure of some sort; if re_match didn't
136 set an exception, raise a generic error */
137 if (!PyErr_Occurred())
138 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000139 return NULL;
140 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000141 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000142 Py_INCREF(argstring);
143 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000144 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000145 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000146}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000147
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000148static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000149regobj_search(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000150 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000151 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000152{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000153 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000154 char *buffer;
155 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000156 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000157 int range;
158 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000159
Guido van Rossum4a807f51997-05-12 16:04:09 +0000160 if (!PyArg_ParseTuple(args, "O|i", &argstring, &offset))
161 return NULL;
162 if (!PyArg_Parse(argstring, "s#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000163 return NULL;
164
Guido van Rossum36d330b1993-02-21 20:12:16 +0000165 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000166 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000167 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000168 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000169 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
170 the implementation don't match: the documentation states that
171 |range| positions are tried, while the code tries |range|+1
172 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000173 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000174 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000175 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000176 result = re_search(&re->re_patbuf, buffer, size, offset, range,
177 &re->re_regs);
178 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000179 /* Serious failure of some sort; if re_match didn't
180 set an exception, raise a generic error */
181 if (!PyErr_Occurred())
182 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000183 return NULL;
184 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000185 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000186 Py_INCREF(argstring);
187 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000188 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000189 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000190}
191
Barry Warsawc3573251996-12-20 21:56:07 +0000192/* get the group from the regex where index can be a string (group name) or
193 an integer index [0 .. 99]
194 */
195static PyObject*
196group_from_index(re, index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000197 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000198 PyObject *index;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000199{
200 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000201 char *v;
202
203 if (PyString_Check(index))
204 if (re->re_groupindex == NULL ||
205 !(index = PyDict_GetItem(re->re_groupindex, index)))
206 {
207 PyErr_SetString(RegexError,
208 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000209 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000210 }
Barry Warsawc3573251996-12-20 21:56:07 +0000211
212 i = PyInt_AsLong(index);
213 if (i == -1 && PyErr_Occurred())
214 return NULL;
215
Guido van Rossum36d330b1993-02-21 20:12:16 +0000216 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000217 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000218 return NULL;
219 }
220 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000221 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000222 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000223 return NULL;
224 }
225 a = re->re_regs.start[i];
226 b = re->re_regs.end[i];
227 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000228 Py_INCREF(Py_None);
229 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000230 }
Barry Warsawc3573251996-12-20 21:56:07 +0000231
232 if (!(v = PyString_AsString(re->re_lastok)))
233 return NULL;
234
235 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000236}
237
Barry Warsawc3573251996-12-20 21:56:07 +0000238
239static PyObject *
240regobj_group(re, args)
241 regexobject *re;
242 PyObject *args;
243{
244 int n = PyTuple_Size(args);
245 int i;
246 PyObject *res = NULL;
247
248 if (n < 0)
249 return NULL;
250 if (n == 0) {
251 PyErr_SetString(PyExc_TypeError, "not enough arguments");
252 return NULL;
253 }
254 if (n == 1) {
255 /* return value is a single string */
256 PyObject *index = PyTuple_GetItem(args, 0);
257 if (!index)
258 return NULL;
259
260 return group_from_index(re, index);
261 }
262
263 /* return value is a tuple */
264 if (!(res = PyTuple_New(n)))
265 return NULL;
266
267 for (i = 0; i < n; i++) {
268 PyObject *index = PyTuple_GetItem(args, i);
269 PyObject *group = NULL;
270
271 if (!index)
272 goto finally;
273 if (!(group = group_from_index(re, index)))
274 goto finally;
275 if (PyTuple_SetItem(res, i, group) < 0)
276 goto finally;
277 }
278 return res;
279
280 finally:
281 Py_DECREF(res);
282 return NULL;
283}
284
285
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000286static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000287 {"match", (PyCFunction)regobj_match, 1},
288 {"search", (PyCFunction)regobj_search, 1},
289 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000290 {NULL, NULL} /* sentinel */
291};
292
Barry Warsawc3573251996-12-20 21:56:07 +0000293
294
295static char* members[] = {
296 "last", "regs", "translate",
297 "groupindex", "realpat", "givenpat",
298 NULL
299};
300
301
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000302static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000303regobj_getattr(re, name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000304 regexobject *re;
305 char *name;
306{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000307 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000308 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000309 Py_INCREF(Py_None);
310 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000311 }
312 return makeresult(&re->re_regs);
313 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000314 if (strcmp(name, "last") == 0) {
315 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000316 Py_INCREF(Py_None);
317 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000318 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000319 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000320 return re->re_lastok;
321 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000322 if (strcmp(name, "translate") == 0) {
323 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000324 Py_INCREF(Py_None);
325 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000326 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000327 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000328 return re->re_translate;
329 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000330 if (strcmp(name, "groupindex") == 0) {
331 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000332 Py_INCREF(Py_None);
333 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000334 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000335 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000336 return re->re_groupindex;
337 }
338 if (strcmp(name, "realpat") == 0) {
339 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000340 Py_INCREF(Py_None);
341 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000342 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000343 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000344 return re->re_realpat;
345 }
346 if (strcmp(name, "givenpat") == 0) {
347 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000348 Py_INCREF(Py_None);
349 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000350 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000351 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000352 return re->re_givenpat;
353 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000354 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000355 int i = 0;
356 PyObject *list = NULL;
357
358 /* okay, so it's unlikely this list will change that often.
359 still, it's easier to change it in just one place.
360 */
361 while (members[i])
362 i++;
363 if (!(list = PyList_New(i)))
364 return NULL;
365
366 i = 0;
367 while (members[i]) {
368 PyObject* v = PyString_FromString(members[i]);
369 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000370 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000371 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000372 }
Barry Warsawc3573251996-12-20 21:56:07 +0000373 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000374 }
375 return list;
376 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000377 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000378}
379
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000380static PyTypeObject Regextype = {
381 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000382 0, /*ob_size*/
383 "regex", /*tp_name*/
384 sizeof(regexobject), /*tp_size*/
385 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000386 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000387 (destructor)reg_dealloc, /*tp_dealloc*/
388 0, /*tp_print*/
389 (getattrfunc)regobj_getattr, /*tp_getattr*/
390 0, /*tp_setattr*/
391 0, /*tp_compare*/
392 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000393};
394
Barry Warsawc3573251996-12-20 21:56:07 +0000395/* reference counting invariants:
396 pattern: borrowed
397 translate: borrowed
398 givenpat: borrowed
399 groupindex: transferred
400*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000401static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000402newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000403 PyObject *pattern;
404 PyObject *translate;
405 PyObject *givenpat;
406 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000407{
408 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000409 char *pat;
410 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000411
Barry Warsawc3573251996-12-20 21:56:07 +0000412 if (!PyArg_Parse(pattern, "s#", &pat, &size))
413 return NULL;
414
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000415 if (translate != NULL && PyString_Size(translate) != 256) {
416 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000417 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000418 return NULL;
419 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000420 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000421 if (re != NULL) {
422 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000423 re->re_patbuf.buffer = NULL;
424 re->re_patbuf.allocated = 0;
425 re->re_patbuf.fastmap = re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000426 if (translate) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000427 re->re_patbuf.translate = PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000428 if (!re->re_patbuf.translate)
429 goto finally;
430 Py_INCREF(translate);
431 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000432 else
433 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000434 re->re_translate = translate;
435 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000436 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000437 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000438 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000439 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000440 re->re_givenpat = givenpat;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000441 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000442 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000443 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000444 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000445 }
446 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000447 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000448 finally:
449 Py_DECREF(re);
450 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000451}
452
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000453static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000454regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000455 PyObject *self;
456 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000457{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000458 PyObject *pat = NULL;
459 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000460
461 if (!PyArg_ParseTuple(args, "S|S", &pat, &tran))
462 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000463 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000464}
465
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000466static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000467symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000468 PyObject *pattern;
469 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000470{
Barry Warsawc3573251996-12-20 21:56:07 +0000471 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000472 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000473 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000474 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000475 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000476 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000477 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
478
Barry Warsawc3573251996-12-20 21:56:07 +0000479 if (!(opat = PyString_AsString(pattern)))
480 return NULL;
481
482 if ((sz = PyString_Size(pattern)) < 0)
483 return NULL;
484
485 oend = opat + sz;
486 o = opat;
487
Guido van Rossumab28c561996-06-11 18:33:14 +0000488 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000489 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000490 return pattern;
491 }
492
Barry Warsawc3573251996-12-20 21:56:07 +0000493 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
494 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000495 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000496
497 while (o < oend) {
498 if (*o == '(' && escaped == require_escape) {
499 char *backtrack;
500 escaped = 0;
501 ++group_count;
502 *n++ = *o;
503 if (++o >= oend || *o != '<')
504 continue;
505 /* *o == '<' */
506 if (o+1 < oend && *(o+1) == '>')
507 continue;
508 backtrack = o;
509 g = name_buf;
510 for (++o; o < oend;) {
511 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000512 PyObject *group_name = NULL;
513 PyObject *group_index = NULL;
514 *g++ = '\0';
515 group_name = PyString_FromString(name_buf);
516 group_index = PyInt_FromLong(group_count);
517 if (group_name == NULL ||
518 group_index == NULL ||
519 PyDict_SetItem(gdict, group_name,
520 group_index) != 0)
521 {
522 Py_XDECREF(group_name);
523 Py_XDECREF(group_index);
524 Py_XDECREF(npattern);
525 return NULL;
526 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000527 Py_DECREF(group_name);
528 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000529 ++o; /* eat the '>' */
530 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000531 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000532 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000533 o = backtrack;
534 break;
535 }
536 *g++ = *o++;
537 }
538 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000539 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000540 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000541 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000542 *n++ = *o;
543 while (o < oend && *o != ']') {
544 ++o;
545 *n++ = *o;
546 }
547 if (o < oend)
548 ++o;
549 }
550 else if (*o == '\\') {
551 escaped = 1;
552 *n++ = *o;
553 ++o;
554 }
555 else {
556 escaped = 0;
557 *n++ = *o;
558 ++o;
559 }
560 }
561
Barry Warsawc3573251996-12-20 21:56:07 +0000562 if (!(v = PyString_AsString(npattern))) {
563 Py_DECREF(npattern);
564 return NULL;
565 }
566 /* _PyString_Resize() decrements npattern on failure */
567 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000568 return npattern;
569 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000570 return NULL;
571 }
572
573}
574
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000575static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000576regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000577 PyObject *self;
578 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000579{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000580 PyObject *pattern;
581 PyObject *tran = NULL;
582 PyObject *gdict = NULL;
583 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000584 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000585
586 if (!PyArg_ParseTuple(args, "S|S", &pattern, &tran))
587 return NULL;
588
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000589 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000590 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000591 Py_DECREF(gdict);
592 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000593 return NULL;
594 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000595 retval = newregexobject(npattern, tran, pattern, gdict);
596 Py_DECREF(npattern);
597 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000598}
599
600
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000601static PyObject *cache_pat;
602static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000603
604static int
605update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000606 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000607{
Barry Warsawc3573251996-12-20 21:56:07 +0000608 PyObject *tuple = Py_BuildValue("(O)", pat);
609 int status = 0;
610
611 if (!tuple)
612 return -1;
613
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000614 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000615 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000616 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000617 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000618 cache_prog = regex_compile((PyObject *)NULL, tuple);
619 if (cache_prog == NULL) {
620 status = -1;
621 goto finally;
622 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000623 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000624 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000625 }
Barry Warsawc3573251996-12-20 21:56:07 +0000626 finally:
627 Py_DECREF(tuple);
628 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000629}
630
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000631static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000632regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000633 PyObject *self;
634 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000635{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000636 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000637 PyObject *tuple, *v;
638
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000639 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000640 return NULL;
641 if (update_cache(pat) < 0)
642 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000643
644 if (!(tuple = Py_BuildValue("(S)", string)))
645 return NULL;
646 v = regobj_match((regexobject *)cache_prog, tuple);
647 Py_DECREF(tuple);
648 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000649}
650
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000651static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000652regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000653 PyObject *self;
654 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000655{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000656 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000657 PyObject *tuple, *v;
658
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000659 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000660 return NULL;
661 if (update_cache(pat) < 0)
662 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000663
664 if (!(tuple = Py_BuildValue("(S)", string)))
665 return NULL;
666 v = regobj_search((regexobject *)cache_prog, tuple);
667 Py_DECREF(tuple);
668 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000669}
670
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000671static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000672regex_set_syntax(self, args)
Barry Warsawc3573251996-12-20 21:56:07 +0000673 PyObject *self;
674 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000675{
676 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000677 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000678 return NULL;
679 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000680 /* wipe the global pattern cache */
681 Py_XDECREF(cache_pat);
682 cache_pat = NULL;
683 Py_XDECREF(cache_prog);
684 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000685 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000686}
687
Barry Warsaw909d7c31997-02-18 18:48:50 +0000688static PyObject *
689regex_get_syntax(self, args)
690 PyObject *self;
691 PyObject *args;
692{
693 if (!PyArg_Parse(args, ""))
694 return NULL;
695 return PyInt_FromLong((long)re_syntax);
696}
697
698
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000699static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000700 {"compile", regex_compile, 1},
701 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000702 {"match", regex_match, 0},
703 {"search", regex_search, 0},
704 {"set_syntax", regex_set_syntax, 0},
Barry Warsaw909d7c31997-02-18 18:48:50 +0000705 {"get_syntax", regex_get_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000706 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000707};
708
Guido van Rossum8f3032d1996-08-19 22:03:12 +0000709void
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000710initregex()
711{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000712 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000713 int i;
714 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000715
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000716 m = Py_InitModule("regex", regex_global_methods);
717 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000718
719 /* Initialize regex.error exception */
Barry Warsawc3573251996-12-20 21:56:07 +0000720 v = RegexError = PyString_FromString("regex.error");
721 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
722 goto finally;
723
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000724 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000725 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
726 goto finally;
727
728 if (!(s = PyString_AsString(v)))
729 goto finally;
730
731 for (i = 0; i < 256; i++) {
732 if (isupper(i))
733 s[i] = tolower(i);
734 else
735 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000736 }
Barry Warsawc3573251996-12-20 21:56:07 +0000737 if (PyDict_SetItemString(d, "casefold", v) < 0)
738 goto finally;
739 Py_DECREF(v);
740
741 if (!PyErr_Occurred())
742 return;
743 finally:
744 Py_FatalError("can't initialize regex module");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000745}