blob: ff3b8950dd47916d7beced1b27088b2ee9a3d432 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007Copyright (c) 2000, BeOpen.com.
8Copyright (c) 1995-2000, Corporation for National Research Initiatives.
9Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
10All rights reserved.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000011
Guido van Rossumfd71b9e2000-06-30 23:50:40 +000012See the file "Misc/COPYRIGHT" for information on usage and
13redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014******************************************************************/
15
16/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000017/* This uses Tatu Ylonen's copyleft-free reimplementation of
18 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000019
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000020#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossuma376cc51996-12-05 23:43:35 +000022#include <ctype.h>
23
Guido van Rossum1cab95c1992-01-19 16:31:57 +000024#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000025
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000026static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000027
28typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000029 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000030 struct re_pattern_buffer re_patbuf; /* The compiled expression */
31 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000032 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000033 PyObject *re_translate; /* String object for translate table */
34 PyObject *re_lastok; /* String object last matched/searched */
35 PyObject *re_groupindex; /* Group name to index dictionary */
36 PyObject *re_givenpat; /* Pattern with symbolic groups */
37 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000038} regexobject;
39
40/* Regex object methods */
41
42static void
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000043reg_dealloc(regexobject *re)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044{
Guido van Rossumb18618d2000-05-03 23:44:39 +000045 if (re->re_patbuf.buffer)
Vladimir Marangozov9e3d73a2000-07-12 00:49:17 +000046 free(re->re_patbuf.buffer);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000047 Py_XDECREF(re->re_translate);
48 Py_XDECREF(re->re_lastok);
49 Py_XDECREF(re->re_groupindex);
50 Py_XDECREF(re->re_givenpat);
51 Py_XDECREF(re->re_realpat);
Guido van Rossumb18618d2000-05-03 23:44:39 +000052 PyObject_Del(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000053}
54
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000055static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000056makeresult(struct re_registers *regs)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000057{
Guido van Rossumc1962021996-10-08 14:18:42 +000058 PyObject *v;
59 int i;
60 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000061
Guido van Rossumc1962021996-10-08 14:18:42 +000062 if (filler == NULL) {
63 filler = Py_BuildValue("(ii)", -1, -1);
64 if (filler == NULL)
65 return NULL;
66 }
67 v = PyTuple_New(RE_NREGS);
68 if (v == NULL)
69 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000070
Guido van Rossumc1962021996-10-08 14:18:42 +000071 for (i = 0; i < RE_NREGS; i++) {
72 int lo = regs->start[i];
73 int hi = regs->end[i];
74 PyObject *w;
75 if (lo == -1 && hi == -1) {
76 w = filler;
77 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000078 }
Guido van Rossumc1962021996-10-08 14:18:42 +000079 else
80 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +000081 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
82 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +000083 return NULL;
84 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000085 }
86 return v;
87}
88
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000089static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000090regobj_match(regexobject *re, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000091{
Guido van Rossum4a807f51997-05-12 16:04:09 +000092 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000093 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000094 int size;
Barry Warsawc3573251996-12-20 21:56:07 +000095 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000096 int result;
Barry Warsawc3573251996-12-20 21:56:07 +000097
Guido van Rossum43713e52000-02-29 13:59:29 +000098 if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +000099 return NULL;
Guido van Rossum7e488981998-10-08 02:25:24 +0000100 if (!PyArg_Parse(argstring, "t#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000101 return NULL;
102
Guido van Rossum36d330b1993-02-21 20:12:16 +0000103 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000104 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000105 return NULL;
106 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000107 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000108 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000109 result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
Guido van Rossum0318bd61997-08-14 14:35:12 +0000110 &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000111 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000112 /* Serious failure of some sort; if re_match didn't
113 set an exception, raise a generic error */
114 if (!PyErr_Occurred())
115 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000116 return NULL;
117 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000118 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000119 Py_INCREF(argstring);
120 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000121 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000122 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000123}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000124
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000125static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000126regobj_search(regexobject *re, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000127{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000128 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000129 char *buffer;
130 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000131 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000132 int range;
133 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000134
Guido van Rossum43713e52000-02-29 13:59:29 +0000135 if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +0000136 return NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000137 if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000138 return NULL;
139
Guido van Rossum36d330b1993-02-21 20:12:16 +0000140 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000141 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000142 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000143 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000144 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
145 the implementation don't match: the documentation states that
146 |range| positions are tried, while the code tries |range|+1
147 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000148 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000149 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000150 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000151 result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000152 &re->re_regs);
153 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000154 /* Serious failure of some sort; if re_match didn't
155 set an exception, raise a generic error */
156 if (!PyErr_Occurred())
157 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000158 return NULL;
159 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000160 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000161 Py_INCREF(argstring);
162 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000163 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000164 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000165}
166
Barry Warsawc3573251996-12-20 21:56:07 +0000167/* get the group from the regex where index can be a string (group name) or
168 an integer index [0 .. 99]
169 */
170static PyObject*
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000171group_from_index(regexobject *re, PyObject *index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000172{
173 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000174 char *v;
175
176 if (PyString_Check(index))
177 if (re->re_groupindex == NULL ||
178 !(index = PyDict_GetItem(re->re_groupindex, index)))
179 {
180 PyErr_SetString(RegexError,
181 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000182 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000183 }
Barry Warsawc3573251996-12-20 21:56:07 +0000184
185 i = PyInt_AsLong(index);
186 if (i == -1 && PyErr_Occurred())
187 return NULL;
188
Guido van Rossum36d330b1993-02-21 20:12:16 +0000189 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000190 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000191 return NULL;
192 }
193 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000194 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000195 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000196 return NULL;
197 }
198 a = re->re_regs.start[i];
199 b = re->re_regs.end[i];
200 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000201 Py_INCREF(Py_None);
202 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000203 }
Barry Warsawc3573251996-12-20 21:56:07 +0000204
205 if (!(v = PyString_AsString(re->re_lastok)))
206 return NULL;
207
208 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000209}
210
Barry Warsawc3573251996-12-20 21:56:07 +0000211
212static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000213regobj_group(regexobject *re, PyObject *args)
Barry Warsawc3573251996-12-20 21:56:07 +0000214{
215 int n = PyTuple_Size(args);
216 int i;
217 PyObject *res = NULL;
218
219 if (n < 0)
220 return NULL;
221 if (n == 0) {
222 PyErr_SetString(PyExc_TypeError, "not enough arguments");
223 return NULL;
224 }
225 if (n == 1) {
226 /* return value is a single string */
227 PyObject *index = PyTuple_GetItem(args, 0);
228 if (!index)
229 return NULL;
230
231 return group_from_index(re, index);
232 }
233
234 /* return value is a tuple */
235 if (!(res = PyTuple_New(n)))
236 return NULL;
237
238 for (i = 0; i < n; i++) {
239 PyObject *index = PyTuple_GetItem(args, i);
240 PyObject *group = NULL;
241
242 if (!index)
243 goto finally;
244 if (!(group = group_from_index(re, index)))
245 goto finally;
246 if (PyTuple_SetItem(res, i, group) < 0)
247 goto finally;
248 }
249 return res;
250
251 finally:
252 Py_DECREF(res);
253 return NULL;
254}
255
256
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000257static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000258 {"match", (PyCFunction)regobj_match, 1},
259 {"search", (PyCFunction)regobj_search, 1},
260 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000261 {NULL, NULL} /* sentinel */
262};
263
Barry Warsawc3573251996-12-20 21:56:07 +0000264
265
266static char* members[] = {
267 "last", "regs", "translate",
268 "groupindex", "realpat", "givenpat",
269 NULL
270};
271
272
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000273static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000274regobj_getattr(regexobject *re, char *name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000275{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000276 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000277 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000278 Py_INCREF(Py_None);
279 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000280 }
281 return makeresult(&re->re_regs);
282 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000283 if (strcmp(name, "last") == 0) {
284 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000285 Py_INCREF(Py_None);
286 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000287 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000288 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000289 return re->re_lastok;
290 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000291 if (strcmp(name, "translate") == 0) {
292 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000293 Py_INCREF(Py_None);
294 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000295 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000296 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000297 return re->re_translate;
298 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000299 if (strcmp(name, "groupindex") == 0) {
300 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000301 Py_INCREF(Py_None);
302 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000303 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000304 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000305 return re->re_groupindex;
306 }
307 if (strcmp(name, "realpat") == 0) {
308 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000309 Py_INCREF(Py_None);
310 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000311 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000312 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000313 return re->re_realpat;
314 }
315 if (strcmp(name, "givenpat") == 0) {
316 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000317 Py_INCREF(Py_None);
318 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000319 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000320 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000321 return re->re_givenpat;
322 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000323 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000324 int i = 0;
325 PyObject *list = NULL;
326
327 /* okay, so it's unlikely this list will change that often.
328 still, it's easier to change it in just one place.
329 */
330 while (members[i])
331 i++;
332 if (!(list = PyList_New(i)))
333 return NULL;
334
335 i = 0;
336 while (members[i]) {
337 PyObject* v = PyString_FromString(members[i]);
338 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000339 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000340 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000341 }
Barry Warsawc3573251996-12-20 21:56:07 +0000342 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000343 }
344 return list;
345 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000346 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000347}
348
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000349static PyTypeObject Regextype = {
350 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000351 0, /*ob_size*/
352 "regex", /*tp_name*/
353 sizeof(regexobject), /*tp_size*/
354 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000355 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000356 (destructor)reg_dealloc, /*tp_dealloc*/
357 0, /*tp_print*/
358 (getattrfunc)regobj_getattr, /*tp_getattr*/
359 0, /*tp_setattr*/
360 0, /*tp_compare*/
361 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000362};
363
Barry Warsawc3573251996-12-20 21:56:07 +0000364/* reference counting invariants:
365 pattern: borrowed
366 translate: borrowed
367 givenpat: borrowed
368 groupindex: transferred
369*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000370static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000371newregexobject(PyObject *pattern, PyObject *translate, PyObject *givenpat, PyObject *groupindex)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000372{
373 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000374 char *pat;
375 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000376
Guido van Rossum7e488981998-10-08 02:25:24 +0000377 if (!PyArg_Parse(pattern, "t#", &pat, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000378 return NULL;
379
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000380 if (translate != NULL && PyString_Size(translate) != 256) {
381 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000382 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000383 return NULL;
384 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000385 re = PyObject_New(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000386 if (re != NULL) {
387 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000388 re->re_patbuf.buffer = NULL;
389 re->re_patbuf.allocated = 0;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000390 re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000391 if (translate) {
Guido van Rossumed2554a1997-08-18 15:31:24 +0000392 re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000393 if (!re->re_patbuf.translate)
394 goto finally;
395 Py_INCREF(translate);
396 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000397 else
398 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000399 re->re_translate = translate;
400 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000401 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000402 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000403 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000404 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000405 re->re_givenpat = givenpat;
Guido van Rossumd19c04a1997-09-03 00:47:36 +0000406 error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000407 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000408 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000409 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000410 }
411 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000412 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000413 finally:
414 Py_DECREF(re);
415 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000416}
417
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000418static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000419regex_compile(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000420{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000421 PyObject *pat = NULL;
422 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000423
Guido van Rossum43713e52000-02-29 13:59:29 +0000424 if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000425 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000426 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000427}
428
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000429static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000430symcomp(PyObject *pattern, PyObject *gdict)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000431{
Barry Warsawc3573251996-12-20 21:56:07 +0000432 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000433 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000434 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000435 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000436 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000437 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000438 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
439
Barry Warsawc3573251996-12-20 21:56:07 +0000440 if (!(opat = PyString_AsString(pattern)))
441 return NULL;
442
443 if ((sz = PyString_Size(pattern)) < 0)
444 return NULL;
445
446 oend = opat + sz;
447 o = opat;
448
Guido van Rossumab28c561996-06-11 18:33:14 +0000449 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000450 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000451 return pattern;
452 }
453
Barry Warsawc3573251996-12-20 21:56:07 +0000454 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
455 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000456 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000457
458 while (o < oend) {
459 if (*o == '(' && escaped == require_escape) {
460 char *backtrack;
461 escaped = 0;
462 ++group_count;
463 *n++ = *o;
464 if (++o >= oend || *o != '<')
465 continue;
466 /* *o == '<' */
467 if (o+1 < oend && *(o+1) == '>')
468 continue;
469 backtrack = o;
470 g = name_buf;
471 for (++o; o < oend;) {
472 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000473 PyObject *group_name = NULL;
474 PyObject *group_index = NULL;
475 *g++ = '\0';
476 group_name = PyString_FromString(name_buf);
477 group_index = PyInt_FromLong(group_count);
478 if (group_name == NULL ||
479 group_index == NULL ||
480 PyDict_SetItem(gdict, group_name,
481 group_index) != 0)
482 {
483 Py_XDECREF(group_name);
484 Py_XDECREF(group_index);
485 Py_XDECREF(npattern);
486 return NULL;
487 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000488 Py_DECREF(group_name);
489 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000490 ++o; /* eat the '>' */
491 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000493 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000494 o = backtrack;
495 break;
496 }
497 *g++ = *o++;
498 }
499 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000500 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000501 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000502 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000503 *n++ = *o;
504 while (o < oend && *o != ']') {
505 ++o;
506 *n++ = *o;
507 }
508 if (o < oend)
509 ++o;
510 }
511 else if (*o == '\\') {
512 escaped = 1;
513 *n++ = *o;
514 ++o;
515 }
516 else {
517 escaped = 0;
518 *n++ = *o;
519 ++o;
520 }
521 }
522
Barry Warsawc3573251996-12-20 21:56:07 +0000523 if (!(v = PyString_AsString(npattern))) {
524 Py_DECREF(npattern);
525 return NULL;
526 }
527 /* _PyString_Resize() decrements npattern on failure */
528 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000529 return npattern;
530 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000531 return NULL;
532 }
533
534}
535
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000536static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000537regex_symcomp(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000538{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000539 PyObject *pattern;
540 PyObject *tran = NULL;
541 PyObject *gdict = NULL;
542 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000543 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000544
Guido van Rossum43713e52000-02-29 13:59:29 +0000545 if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000546 return NULL;
547
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000548 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000549 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000550 Py_DECREF(gdict);
551 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000552 return NULL;
553 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000554 retval = newregexobject(npattern, tran, pattern, gdict);
555 Py_DECREF(npattern);
556 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000557}
558
559
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000560static PyObject *cache_pat;
561static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000562
563static int
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000564update_cache(PyObject *pat)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000565{
Barry Warsawc3573251996-12-20 21:56:07 +0000566 PyObject *tuple = Py_BuildValue("(O)", pat);
567 int status = 0;
568
569 if (!tuple)
570 return -1;
571
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000572 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000573 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000574 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000575 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000576 cache_prog = regex_compile((PyObject *)NULL, tuple);
577 if (cache_prog == NULL) {
578 status = -1;
579 goto finally;
580 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000581 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000582 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000583 }
Barry Warsawc3573251996-12-20 21:56:07 +0000584 finally:
585 Py_DECREF(tuple);
586 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000587}
588
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000589static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000590regex_match(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000591{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000592 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000593 PyObject *tuple, *v;
594
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000595 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000596 return NULL;
597 if (update_cache(pat) < 0)
598 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000599
600 if (!(tuple = Py_BuildValue("(S)", string)))
601 return NULL;
602 v = regobj_match((regexobject *)cache_prog, tuple);
603 Py_DECREF(tuple);
604 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000605}
606
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000607static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000608regex_search(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000609{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000610 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000611 PyObject *tuple, *v;
612
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000613 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000614 return NULL;
615 if (update_cache(pat) < 0)
616 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000617
618 if (!(tuple = Py_BuildValue("(S)", string)))
619 return NULL;
620 v = regobj_search((regexobject *)cache_prog, tuple);
621 Py_DECREF(tuple);
622 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000623}
624
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000625static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000626regex_set_syntax(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000627{
628 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000629 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000630 return NULL;
631 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000632 /* wipe the global pattern cache */
633 Py_XDECREF(cache_pat);
634 cache_pat = NULL;
635 Py_XDECREF(cache_prog);
636 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000637 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000638}
639
Barry Warsaw909d7c31997-02-18 18:48:50 +0000640static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000641regex_get_syntax(PyObject *self, PyObject *args)
Barry Warsaw909d7c31997-02-18 18:48:50 +0000642{
643 if (!PyArg_Parse(args, ""))
644 return NULL;
645 return PyInt_FromLong((long)re_syntax);
646}
647
648
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000649static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000650 {"compile", regex_compile, 1},
651 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000652 {"match", regex_match, 0},
653 {"search", regex_search, 0},
654 {"set_syntax", regex_set_syntax, 0},
Barry Warsaw909d7c31997-02-18 18:48:50 +0000655 {"get_syntax", regex_get_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000656 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000657};
658
Guido van Rossum3886bb61998-12-04 18:50:17 +0000659DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000660initregex(void)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000661{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000662 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000663 int i;
664 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000665
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000666 m = Py_InitModule("regex", regex_global_methods);
667 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000668
669 /* Initialize regex.error exception */
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000670 v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
Barry Warsawc3573251996-12-20 21:56:07 +0000671 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
672 goto finally;
673
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000674 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000675 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
676 goto finally;
677
678 if (!(s = PyString_AsString(v)))
679 goto finally;
680
681 for (i = 0; i < 256; i++) {
682 if (isupper(i))
683 s[i] = tolower(i);
684 else
685 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000686 }
Barry Warsawc3573251996-12-20 21:56:07 +0000687 if (PyDict_SetItemString(d, "casefold", v) < 0)
688 goto finally;
689 Py_DECREF(v);
690
691 if (!PyErr_Occurred())
692 return;
693 finally:
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000694 /* Nothing */ ;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000695}