blob: ad860683f6b91e9e78ba3c78298a29a8be147e31 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000015both that copyright notice and this permission notice appear in
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000017Centrum or CWI or Corporation for National Research Initiatives or
18CNRI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior
20permission.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossumd266eb41996-10-25 14:44:06 +000022While CWI is the initial source for this software, a modified version
23is made available by the Corporation for National Research Initiatives
24(CNRI) at the Internet address ftp://ftp.python.org.
25
26STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33PERFORMANCE OF THIS SOFTWARE.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35******************************************************************/
36
37/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038/* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000040
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042
Guido van Rossuma376cc51996-12-05 23:43:35 +000043#include <ctype.h>
44
Guido van Rossum1cab95c1992-01-19 16:31:57 +000045#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000046
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000047static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048
49typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000050 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000051 struct re_pattern_buffer re_patbuf; /* The compiled expression */
52 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000053 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000054 PyObject *re_translate; /* String object for translate table */
55 PyObject *re_lastok; /* String object last matched/searched */
56 PyObject *re_groupindex; /* Group name to index dictionary */
57 PyObject *re_givenpat; /* Pattern with symbolic groups */
58 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000059} regexobject;
60
61/* Regex object methods */
62
63static void
64reg_dealloc(re)
65 regexobject *re;
66{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000067 PyMem_XDEL(re->re_patbuf.buffer);
68 Py_XDECREF(re->re_translate);
69 Py_XDECREF(re->re_lastok);
70 Py_XDECREF(re->re_groupindex);
71 Py_XDECREF(re->re_givenpat);
72 Py_XDECREF(re->re_realpat);
73 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000074}
75
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000076static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000077makeresult(regs)
78 struct re_registers *regs;
79{
Guido van Rossumc1962021996-10-08 14:18:42 +000080 PyObject *v;
81 int i;
82 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000083
Guido van Rossumc1962021996-10-08 14:18:42 +000084 if (filler == NULL) {
85 filler = Py_BuildValue("(ii)", -1, -1);
86 if (filler == NULL)
87 return NULL;
88 }
89 v = PyTuple_New(RE_NREGS);
90 if (v == NULL)
91 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000092
Guido van Rossumc1962021996-10-08 14:18:42 +000093 for (i = 0; i < RE_NREGS; i++) {
94 int lo = regs->start[i];
95 int hi = regs->end[i];
96 PyObject *w;
97 if (lo == -1 && hi == -1) {
98 w = filler;
99 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000100 }
Guido van Rossumc1962021996-10-08 14:18:42 +0000101 else
102 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +0000103 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
104 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +0000105 return NULL;
106 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000107 }
108 return v;
109}
110
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000111static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000112regobj_match(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000113 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000114 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000115{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000116 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000117 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000118 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000119 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000120 int result;
Barry Warsawc3573251996-12-20 21:56:07 +0000121
Guido van Rossum4a807f51997-05-12 16:04:09 +0000122 if (!PyArg_ParseTuple(args, "O|i", &argstring, &offset))
123 return NULL;
124 if (!PyArg_Parse(argstring, "s#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000125 return NULL;
126
Guido van Rossum36d330b1993-02-21 20:12:16 +0000127 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000128 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000129 return NULL;
130 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000131 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000132 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000133 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000134 if (result < -1) {
135 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000136 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000137 return NULL;
138 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000139 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000140 Py_INCREF(argstring);
141 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000142 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000143 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000144}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000145
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000146static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000147regobj_search(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000148 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000149 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000150{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000151 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000152 char *buffer;
153 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000154 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000155 int range;
156 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000157
Guido van Rossum4a807f51997-05-12 16:04:09 +0000158 if (!PyArg_ParseTuple(args, "O|i", &argstring, &offset))
159 return NULL;
160 if (!PyArg_Parse(argstring, "s#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000161 return NULL;
162
Guido van Rossum36d330b1993-02-21 20:12:16 +0000163 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000164 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000165 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000166 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000167 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
168 the implementation don't match: the documentation states that
169 |range| positions are tried, while the code tries |range|+1
170 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000171 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000172 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000173 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000174 result = re_search(&re->re_patbuf, buffer, size, offset, range,
175 &re->re_regs);
176 if (result < -1) {
177 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000178 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000179 return NULL;
180 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000181 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000182 Py_INCREF(argstring);
183 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000184 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000185 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000186}
187
Barry Warsawc3573251996-12-20 21:56:07 +0000188/* get the group from the regex where index can be a string (group name) or
189 an integer index [0 .. 99]
190 */
191static PyObject*
192group_from_index(re, index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000193 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000194 PyObject *index;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000195{
196 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000197 char *v;
198
199 if (PyString_Check(index))
200 if (re->re_groupindex == NULL ||
201 !(index = PyDict_GetItem(re->re_groupindex, index)))
202 {
203 PyErr_SetString(RegexError,
204 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000205 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000206 }
Barry Warsawc3573251996-12-20 21:56:07 +0000207
208 i = PyInt_AsLong(index);
209 if (i == -1 && PyErr_Occurred())
210 return NULL;
211
Guido van Rossum36d330b1993-02-21 20:12:16 +0000212 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000213 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000214 return NULL;
215 }
216 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000217 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000218 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000219 return NULL;
220 }
221 a = re->re_regs.start[i];
222 b = re->re_regs.end[i];
223 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000224 Py_INCREF(Py_None);
225 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000226 }
Barry Warsawc3573251996-12-20 21:56:07 +0000227
228 if (!(v = PyString_AsString(re->re_lastok)))
229 return NULL;
230
231 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000232}
233
Barry Warsawc3573251996-12-20 21:56:07 +0000234
235static PyObject *
236regobj_group(re, args)
237 regexobject *re;
238 PyObject *args;
239{
240 int n = PyTuple_Size(args);
241 int i;
242 PyObject *res = NULL;
243
244 if (n < 0)
245 return NULL;
246 if (n == 0) {
247 PyErr_SetString(PyExc_TypeError, "not enough arguments");
248 return NULL;
249 }
250 if (n == 1) {
251 /* return value is a single string */
252 PyObject *index = PyTuple_GetItem(args, 0);
253 if (!index)
254 return NULL;
255
256 return group_from_index(re, index);
257 }
258
259 /* return value is a tuple */
260 if (!(res = PyTuple_New(n)))
261 return NULL;
262
263 for (i = 0; i < n; i++) {
264 PyObject *index = PyTuple_GetItem(args, i);
265 PyObject *group = NULL;
266
267 if (!index)
268 goto finally;
269 if (!(group = group_from_index(re, index)))
270 goto finally;
271 if (PyTuple_SetItem(res, i, group) < 0)
272 goto finally;
273 }
274 return res;
275
276 finally:
277 Py_DECREF(res);
278 return NULL;
279}
280
281
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000282static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000283 {"match", (PyCFunction)regobj_match, 1},
284 {"search", (PyCFunction)regobj_search, 1},
285 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000286 {NULL, NULL} /* sentinel */
287};
288
Barry Warsawc3573251996-12-20 21:56:07 +0000289
290
291static char* members[] = {
292 "last", "regs", "translate",
293 "groupindex", "realpat", "givenpat",
294 NULL
295};
296
297
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000298static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000299regobj_getattr(re, name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000300 regexobject *re;
301 char *name;
302{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000303 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000304 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000305 Py_INCREF(Py_None);
306 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000307 }
308 return makeresult(&re->re_regs);
309 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000310 if (strcmp(name, "last") == 0) {
311 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000312 Py_INCREF(Py_None);
313 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000314 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000315 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000316 return re->re_lastok;
317 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000318 if (strcmp(name, "translate") == 0) {
319 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000320 Py_INCREF(Py_None);
321 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000322 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000323 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000324 return re->re_translate;
325 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000326 if (strcmp(name, "groupindex") == 0) {
327 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000328 Py_INCREF(Py_None);
329 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000330 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000331 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000332 return re->re_groupindex;
333 }
334 if (strcmp(name, "realpat") == 0) {
335 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000336 Py_INCREF(Py_None);
337 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000338 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000339 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000340 return re->re_realpat;
341 }
342 if (strcmp(name, "givenpat") == 0) {
343 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000344 Py_INCREF(Py_None);
345 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000346 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000347 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000348 return re->re_givenpat;
349 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000350 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000351 int i = 0;
352 PyObject *list = NULL;
353
354 /* okay, so it's unlikely this list will change that often.
355 still, it's easier to change it in just one place.
356 */
357 while (members[i])
358 i++;
359 if (!(list = PyList_New(i)))
360 return NULL;
361
362 i = 0;
363 while (members[i]) {
364 PyObject* v = PyString_FromString(members[i]);
365 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000366 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000367 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000368 }
Barry Warsawc3573251996-12-20 21:56:07 +0000369 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000370 }
371 return list;
372 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000373 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000374}
375
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000376static PyTypeObject Regextype = {
377 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000378 0, /*ob_size*/
379 "regex", /*tp_name*/
380 sizeof(regexobject), /*tp_size*/
381 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000382 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000383 (destructor)reg_dealloc, /*tp_dealloc*/
384 0, /*tp_print*/
385 (getattrfunc)regobj_getattr, /*tp_getattr*/
386 0, /*tp_setattr*/
387 0, /*tp_compare*/
388 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000389};
390
Barry Warsawc3573251996-12-20 21:56:07 +0000391/* reference counting invariants:
392 pattern: borrowed
393 translate: borrowed
394 givenpat: borrowed
395 groupindex: transferred
396*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000397static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000398newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000399 PyObject *pattern;
400 PyObject *translate;
401 PyObject *givenpat;
402 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000403{
404 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000405 char *pat;
406 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000407
Barry Warsawc3573251996-12-20 21:56:07 +0000408 if (!PyArg_Parse(pattern, "s#", &pat, &size))
409 return NULL;
410
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000411 if (translate != NULL && PyString_Size(translate) != 256) {
412 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000413 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000414 return NULL;
415 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000416 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000417 if (re != NULL) {
418 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000419 re->re_patbuf.buffer = NULL;
420 re->re_patbuf.allocated = 0;
421 re->re_patbuf.fastmap = re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000422 if (translate) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000423 re->re_patbuf.translate = PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000424 if (!re->re_patbuf.translate)
425 goto finally;
426 Py_INCREF(translate);
427 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000428 else
429 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000430 re->re_translate = translate;
431 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000432 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000433 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000434 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000435 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000436 re->re_givenpat = givenpat;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000437 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000438 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000439 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000440 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000441 }
442 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000443 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000444 finally:
445 Py_DECREF(re);
446 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000447}
448
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000449static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000450regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000451 PyObject *self;
452 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000453{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000454 PyObject *pat = NULL;
455 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000456
457 if (!PyArg_ParseTuple(args, "S|S", &pat, &tran))
458 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000459 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000460}
461
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000462static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000463symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000464 PyObject *pattern;
465 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000466{
Barry Warsawc3573251996-12-20 21:56:07 +0000467 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000468 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000469 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000470 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000471 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000472 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000473 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
474
Barry Warsawc3573251996-12-20 21:56:07 +0000475 if (!(opat = PyString_AsString(pattern)))
476 return NULL;
477
478 if ((sz = PyString_Size(pattern)) < 0)
479 return NULL;
480
481 oend = opat + sz;
482 o = opat;
483
Guido van Rossumab28c561996-06-11 18:33:14 +0000484 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000485 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000486 return pattern;
487 }
488
Barry Warsawc3573251996-12-20 21:56:07 +0000489 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
490 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000491 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492
493 while (o < oend) {
494 if (*o == '(' && escaped == require_escape) {
495 char *backtrack;
496 escaped = 0;
497 ++group_count;
498 *n++ = *o;
499 if (++o >= oend || *o != '<')
500 continue;
501 /* *o == '<' */
502 if (o+1 < oend && *(o+1) == '>')
503 continue;
504 backtrack = o;
505 g = name_buf;
506 for (++o; o < oend;) {
507 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000508 PyObject *group_name = NULL;
509 PyObject *group_index = NULL;
510 *g++ = '\0';
511 group_name = PyString_FromString(name_buf);
512 group_index = PyInt_FromLong(group_count);
513 if (group_name == NULL ||
514 group_index == NULL ||
515 PyDict_SetItem(gdict, group_name,
516 group_index) != 0)
517 {
518 Py_XDECREF(group_name);
519 Py_XDECREF(group_index);
520 Py_XDECREF(npattern);
521 return NULL;
522 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000523 Py_DECREF(group_name);
524 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000525 ++o; /* eat the '>' */
526 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000527 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000528 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000529 o = backtrack;
530 break;
531 }
532 *g++ = *o++;
533 }
534 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000535 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000536 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000537 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000538 *n++ = *o;
539 while (o < oend && *o != ']') {
540 ++o;
541 *n++ = *o;
542 }
543 if (o < oend)
544 ++o;
545 }
546 else if (*o == '\\') {
547 escaped = 1;
548 *n++ = *o;
549 ++o;
550 }
551 else {
552 escaped = 0;
553 *n++ = *o;
554 ++o;
555 }
556 }
557
Barry Warsawc3573251996-12-20 21:56:07 +0000558 if (!(v = PyString_AsString(npattern))) {
559 Py_DECREF(npattern);
560 return NULL;
561 }
562 /* _PyString_Resize() decrements npattern on failure */
563 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000564 return npattern;
565 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000566 return NULL;
567 }
568
569}
570
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000571static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000572regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000573 PyObject *self;
574 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000575{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000576 PyObject *pattern;
577 PyObject *tran = NULL;
578 PyObject *gdict = NULL;
579 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000580 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000581
582 if (!PyArg_ParseTuple(args, "S|S", &pattern, &tran))
583 return NULL;
584
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000585 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000586 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000587 Py_DECREF(gdict);
588 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000589 return NULL;
590 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000591 retval = newregexobject(npattern, tran, pattern, gdict);
592 Py_DECREF(npattern);
593 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000594}
595
596
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000597static PyObject *cache_pat;
598static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000599
600static int
601update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000602 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000603{
Barry Warsawc3573251996-12-20 21:56:07 +0000604 PyObject *tuple = Py_BuildValue("(O)", pat);
605 int status = 0;
606
607 if (!tuple)
608 return -1;
609
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000610 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000611 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000612 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000613 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000614 cache_prog = regex_compile((PyObject *)NULL, tuple);
615 if (cache_prog == NULL) {
616 status = -1;
617 goto finally;
618 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000619 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000620 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000621 }
Barry Warsawc3573251996-12-20 21:56:07 +0000622 finally:
623 Py_DECREF(tuple);
624 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000625}
626
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000627static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000628regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000629 PyObject *self;
630 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000631{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000632 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000633 PyObject *tuple, *v;
634
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000635 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000636 return NULL;
637 if (update_cache(pat) < 0)
638 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000639
640 if (!(tuple = Py_BuildValue("(S)", string)))
641 return NULL;
642 v = regobj_match((regexobject *)cache_prog, tuple);
643 Py_DECREF(tuple);
644 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000645}
646
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000647static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000648regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000649 PyObject *self;
650 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000651{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000652 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000653 PyObject *tuple, *v;
654
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000655 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000656 return NULL;
657 if (update_cache(pat) < 0)
658 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000659
660 if (!(tuple = Py_BuildValue("(S)", string)))
661 return NULL;
662 v = regobj_search((regexobject *)cache_prog, tuple);
663 Py_DECREF(tuple);
664 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000665}
666
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000667static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000668regex_set_syntax(self, args)
Barry Warsawc3573251996-12-20 21:56:07 +0000669 PyObject *self;
670 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000671{
672 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000673 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000674 return NULL;
675 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000676 /* wipe the global pattern cache */
677 Py_XDECREF(cache_pat);
678 cache_pat = NULL;
679 Py_XDECREF(cache_prog);
680 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000681 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000682}
683
Barry Warsaw909d7c31997-02-18 18:48:50 +0000684static PyObject *
685regex_get_syntax(self, args)
686 PyObject *self;
687 PyObject *args;
688{
689 if (!PyArg_Parse(args, ""))
690 return NULL;
691 return PyInt_FromLong((long)re_syntax);
692}
693
694
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000695static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000696 {"compile", regex_compile, 1},
697 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000698 {"match", regex_match, 0},
699 {"search", regex_search, 0},
700 {"set_syntax", regex_set_syntax, 0},
Barry Warsaw909d7c31997-02-18 18:48:50 +0000701 {"get_syntax", regex_get_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000702 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000703};
704
Guido van Rossum8f3032d1996-08-19 22:03:12 +0000705void
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000706initregex()
707{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000708 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000709 int i;
710 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000711
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000712 m = Py_InitModule("regex", regex_global_methods);
713 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000714
715 /* Initialize regex.error exception */
Barry Warsawc3573251996-12-20 21:56:07 +0000716 v = RegexError = PyString_FromString("regex.error");
717 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
718 goto finally;
719
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000720 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000721 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
722 goto finally;
723
724 if (!(s = PyString_AsString(v)))
725 goto finally;
726
727 for (i = 0; i < 256; i++) {
728 if (isupper(i))
729 s[i] = tolower(i);
730 else
731 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000732 }
Barry Warsawc3573251996-12-20 21:56:07 +0000733 if (PyDict_SetItemString(d, "casefold", v) < 0)
734 goto finally;
735 Py_DECREF(v);
736
737 if (!PyErr_Occurred())
738 return;
739 finally:
740 Py_FatalError("can't initialize regex module");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000741}