blob: 2fb41982077dde2d5bae58de6cff7908d0086e7d [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00006
7/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +00008/* This uses Tatu Ylonen's copyleft-free reimplementation of
9 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000010
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000011#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000012
Guido van Rossuma376cc51996-12-05 23:43:35 +000013#include <ctype.h>
14
Guido van Rossum1cab95c1992-01-19 16:31:57 +000015#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000017static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000018
19typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000020 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021 struct re_pattern_buffer re_patbuf; /* The compiled expression */
22 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000023 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000024 PyObject *re_translate; /* String object for translate table */
25 PyObject *re_lastok; /* String object last matched/searched */
26 PyObject *re_groupindex; /* Group name to index dictionary */
27 PyObject *re_givenpat; /* Pattern with symbolic groups */
28 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000029} regexobject;
30
31/* Regex object methods */
32
33static void
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000034reg_dealloc(regexobject *re)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000035{
Guido van Rossumb18618d2000-05-03 23:44:39 +000036 if (re->re_patbuf.buffer)
Vladimir Marangozov9e3d73a2000-07-12 00:49:17 +000037 free(re->re_patbuf.buffer);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000038 Py_XDECREF(re->re_translate);
39 Py_XDECREF(re->re_lastok);
40 Py_XDECREF(re->re_groupindex);
41 Py_XDECREF(re->re_givenpat);
42 Py_XDECREF(re->re_realpat);
Guido van Rossumb18618d2000-05-03 23:44:39 +000043 PyObject_Del(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044}
45
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000046static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000047makeresult(struct re_registers *regs)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048{
Guido van Rossumc1962021996-10-08 14:18:42 +000049 PyObject *v;
50 int i;
51 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000052
Guido van Rossumc1962021996-10-08 14:18:42 +000053 if (filler == NULL) {
54 filler = Py_BuildValue("(ii)", -1, -1);
55 if (filler == NULL)
56 return NULL;
57 }
58 v = PyTuple_New(RE_NREGS);
59 if (v == NULL)
60 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000061
Guido van Rossumc1962021996-10-08 14:18:42 +000062 for (i = 0; i < RE_NREGS; i++) {
63 int lo = regs->start[i];
64 int hi = regs->end[i];
65 PyObject *w;
66 if (lo == -1 && hi == -1) {
67 w = filler;
68 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000069 }
Guido van Rossumc1962021996-10-08 14:18:42 +000070 else
71 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +000072 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
73 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +000074 return NULL;
75 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000076 }
77 return v;
78}
79
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000080static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000081regobj_match(regexobject *re, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000082{
Guido van Rossum4a807f51997-05-12 16:04:09 +000083 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000084 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000085 int size;
Barry Warsawc3573251996-12-20 21:56:07 +000086 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000087 int result;
Barry Warsawc3573251996-12-20 21:56:07 +000088
Guido van Rossum43713e52000-02-29 13:59:29 +000089 if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +000090 return NULL;
Guido van Rossum7e488981998-10-08 02:25:24 +000091 if (!PyArg_Parse(argstring, "t#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +000092 return NULL;
93
Guido van Rossum36d330b1993-02-21 20:12:16 +000094 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000095 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +000096 return NULL;
97 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000098 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +000099 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000100 result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
Guido van Rossum0318bd61997-08-14 14:35:12 +0000101 &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000102 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000103 /* Serious failure of some sort; if re_match didn't
104 set an exception, raise a generic error */
105 if (!PyErr_Occurred())
106 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000107 return NULL;
108 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000109 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000110 Py_INCREF(argstring);
111 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000112 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000113 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000114}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000115
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000116static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000117regobj_search(regexobject *re, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000118{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000119 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000120 char *buffer;
121 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000122 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000123 int range;
124 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000125
Guido van Rossum43713e52000-02-29 13:59:29 +0000126 if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +0000127 return NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000128 if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000129 return NULL;
130
Guido van Rossum36d330b1993-02-21 20:12:16 +0000131 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000132 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000133 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000134 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000135 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
136 the implementation don't match: the documentation states that
137 |range| positions are tried, while the code tries |range|+1
138 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000139 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000140 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000141 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000142 result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000143 &re->re_regs);
144 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000145 /* Serious failure of some sort; if re_match didn't
146 set an exception, raise a generic error */
147 if (!PyErr_Occurred())
148 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000149 return NULL;
150 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000151 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000152 Py_INCREF(argstring);
153 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000154 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000155 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000156}
157
Barry Warsawc3573251996-12-20 21:56:07 +0000158/* get the group from the regex where index can be a string (group name) or
159 an integer index [0 .. 99]
160 */
161static PyObject*
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000162group_from_index(regexobject *re, PyObject *index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000163{
164 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000165 char *v;
166
167 if (PyString_Check(index))
168 if (re->re_groupindex == NULL ||
169 !(index = PyDict_GetItem(re->re_groupindex, index)))
170 {
171 PyErr_SetString(RegexError,
172 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000173 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000174 }
Barry Warsawc3573251996-12-20 21:56:07 +0000175
176 i = PyInt_AsLong(index);
177 if (i == -1 && PyErr_Occurred())
178 return NULL;
179
Guido van Rossum36d330b1993-02-21 20:12:16 +0000180 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000181 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000182 return NULL;
183 }
184 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000185 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000186 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000187 return NULL;
188 }
189 a = re->re_regs.start[i];
190 b = re->re_regs.end[i];
191 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000192 Py_INCREF(Py_None);
193 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000194 }
Barry Warsawc3573251996-12-20 21:56:07 +0000195
196 if (!(v = PyString_AsString(re->re_lastok)))
197 return NULL;
198
199 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000200}
201
Barry Warsawc3573251996-12-20 21:56:07 +0000202
203static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000204regobj_group(regexobject *re, PyObject *args)
Barry Warsawc3573251996-12-20 21:56:07 +0000205{
206 int n = PyTuple_Size(args);
207 int i;
208 PyObject *res = NULL;
209
210 if (n < 0)
211 return NULL;
212 if (n == 0) {
213 PyErr_SetString(PyExc_TypeError, "not enough arguments");
214 return NULL;
215 }
216 if (n == 1) {
217 /* return value is a single string */
218 PyObject *index = PyTuple_GetItem(args, 0);
219 if (!index)
220 return NULL;
221
222 return group_from_index(re, index);
223 }
224
225 /* return value is a tuple */
226 if (!(res = PyTuple_New(n)))
227 return NULL;
228
229 for (i = 0; i < n; i++) {
230 PyObject *index = PyTuple_GetItem(args, i);
231 PyObject *group = NULL;
232
233 if (!index)
234 goto finally;
235 if (!(group = group_from_index(re, index)))
236 goto finally;
237 if (PyTuple_SetItem(res, i, group) < 0)
238 goto finally;
239 }
240 return res;
241
242 finally:
243 Py_DECREF(res);
244 return NULL;
245}
246
247
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000248static struct PyMethodDef reg_methods[] = {
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000249 {"match", (PyCFunction)regobj_match, METH_VARARGS},
250 {"search", (PyCFunction)regobj_search, METH_VARARGS},
251 {"group", (PyCFunction)regobj_group, METH_VARARGS},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000252 {NULL, NULL} /* sentinel */
253};
254
Barry Warsawc3573251996-12-20 21:56:07 +0000255
256
257static char* members[] = {
258 "last", "regs", "translate",
259 "groupindex", "realpat", "givenpat",
260 NULL
261};
262
263
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000264static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000265regobj_getattr(regexobject *re, char *name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000266{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000267 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000268 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000269 Py_INCREF(Py_None);
270 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000271 }
272 return makeresult(&re->re_regs);
273 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000274 if (strcmp(name, "last") == 0) {
275 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000276 Py_INCREF(Py_None);
277 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000278 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000279 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000280 return re->re_lastok;
281 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000282 if (strcmp(name, "translate") == 0) {
283 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000284 Py_INCREF(Py_None);
285 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000286 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000287 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000288 return re->re_translate;
289 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000290 if (strcmp(name, "groupindex") == 0) {
291 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000292 Py_INCREF(Py_None);
293 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000294 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000295 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000296 return re->re_groupindex;
297 }
298 if (strcmp(name, "realpat") == 0) {
299 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000300 Py_INCREF(Py_None);
301 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000302 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000303 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000304 return re->re_realpat;
305 }
306 if (strcmp(name, "givenpat") == 0) {
307 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000308 Py_INCREF(Py_None);
309 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000310 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000311 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000312 return re->re_givenpat;
313 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000314 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000315 int i = 0;
316 PyObject *list = NULL;
317
318 /* okay, so it's unlikely this list will change that often.
319 still, it's easier to change it in just one place.
320 */
321 while (members[i])
322 i++;
323 if (!(list = PyList_New(i)))
324 return NULL;
325
326 i = 0;
327 while (members[i]) {
328 PyObject* v = PyString_FromString(members[i]);
329 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000330 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000331 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000332 }
Barry Warsawc3573251996-12-20 21:56:07 +0000333 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000334 }
335 return list;
336 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000337 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000338}
339
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000340static PyTypeObject Regextype = {
Guido van Rossuma120ffc2001-01-22 15:29:14 +0000341 PyObject_HEAD_INIT(NULL)
Barry Warsawc3573251996-12-20 21:56:07 +0000342 0, /*ob_size*/
Guido van Rossum14648392001-12-08 18:02:58 +0000343 "regex.regex", /*tp_name*/
Barry Warsawc3573251996-12-20 21:56:07 +0000344 sizeof(regexobject), /*tp_size*/
345 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000346 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000347 (destructor)reg_dealloc, /*tp_dealloc*/
348 0, /*tp_print*/
349 (getattrfunc)regobj_getattr, /*tp_getattr*/
350 0, /*tp_setattr*/
351 0, /*tp_compare*/
352 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000353};
354
Barry Warsawc3573251996-12-20 21:56:07 +0000355/* reference counting invariants:
356 pattern: borrowed
357 translate: borrowed
358 givenpat: borrowed
359 groupindex: transferred
360*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000361static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000362newregexobject(PyObject *pattern, PyObject *translate, PyObject *givenpat, PyObject *groupindex)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000363{
364 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000365 char *pat;
366 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000367
Guido van Rossum7e488981998-10-08 02:25:24 +0000368 if (!PyArg_Parse(pattern, "t#", &pat, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000369 return NULL;
370
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000371 if (translate != NULL && PyString_Size(translate) != 256) {
372 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000373 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000374 return NULL;
375 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000376 re = PyObject_New(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000377 if (re != NULL) {
378 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000379 re->re_patbuf.buffer = NULL;
380 re->re_patbuf.allocated = 0;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000381 re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000382 if (translate) {
Guido van Rossumed2554a1997-08-18 15:31:24 +0000383 re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000384 if (!re->re_patbuf.translate)
385 goto finally;
386 Py_INCREF(translate);
387 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000388 else
389 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000390 re->re_translate = translate;
391 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000392 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000393 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000394 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000395 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000396 re->re_givenpat = givenpat;
Guido van Rossumd19c04a1997-09-03 00:47:36 +0000397 error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000398 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000399 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000400 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000401 }
402 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000403 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000404 finally:
405 Py_DECREF(re);
406 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000407}
408
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000409static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000410regex_compile(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000411{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000412 PyObject *pat = NULL;
413 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000414
Guido van Rossum43713e52000-02-29 13:59:29 +0000415 if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000416 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000417 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000418}
419
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000420static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000421symcomp(PyObject *pattern, PyObject *gdict)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000422{
Barry Warsawc3573251996-12-20 21:56:07 +0000423 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000424 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000425 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000426 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000427 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000428 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000429 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
430
Barry Warsawc3573251996-12-20 21:56:07 +0000431 if (!(opat = PyString_AsString(pattern)))
432 return NULL;
433
434 if ((sz = PyString_Size(pattern)) < 0)
435 return NULL;
436
437 oend = opat + sz;
438 o = opat;
439
Guido van Rossumab28c561996-06-11 18:33:14 +0000440 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000441 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000442 return pattern;
443 }
444
Barry Warsawc3573251996-12-20 21:56:07 +0000445 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
446 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000447 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000448
449 while (o < oend) {
450 if (*o == '(' && escaped == require_escape) {
451 char *backtrack;
452 escaped = 0;
453 ++group_count;
454 *n++ = *o;
455 if (++o >= oend || *o != '<')
456 continue;
457 /* *o == '<' */
458 if (o+1 < oend && *(o+1) == '>')
459 continue;
460 backtrack = o;
461 g = name_buf;
462 for (++o; o < oend;) {
463 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000464 PyObject *group_name = NULL;
465 PyObject *group_index = NULL;
466 *g++ = '\0';
467 group_name = PyString_FromString(name_buf);
468 group_index = PyInt_FromLong(group_count);
469 if (group_name == NULL ||
470 group_index == NULL ||
471 PyDict_SetItem(gdict, group_name,
472 group_index) != 0)
473 {
474 Py_XDECREF(group_name);
475 Py_XDECREF(group_index);
476 Py_XDECREF(npattern);
477 return NULL;
478 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000479 Py_DECREF(group_name);
480 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000481 ++o; /* eat the '>' */
482 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000483 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000484 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000485 o = backtrack;
486 break;
487 }
488 *g++ = *o++;
489 }
490 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000491 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000493 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000494 *n++ = *o;
495 while (o < oend && *o != ']') {
496 ++o;
497 *n++ = *o;
498 }
499 if (o < oend)
500 ++o;
501 }
502 else if (*o == '\\') {
503 escaped = 1;
504 *n++ = *o;
505 ++o;
506 }
507 else {
508 escaped = 0;
509 *n++ = *o;
510 ++o;
511 }
512 }
513
Barry Warsawc3573251996-12-20 21:56:07 +0000514 if (!(v = PyString_AsString(npattern))) {
515 Py_DECREF(npattern);
516 return NULL;
517 }
518 /* _PyString_Resize() decrements npattern on failure */
Tim Peters5de98422002-04-27 18:44:32 +0000519 _PyString_Resize(&npattern, n - v);
520 return npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000521
522}
523
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000524static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000525regex_symcomp(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000526{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000527 PyObject *pattern;
528 PyObject *tran = NULL;
529 PyObject *gdict = NULL;
530 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000531 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000532
Guido van Rossum43713e52000-02-29 13:59:29 +0000533 if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000534 return NULL;
535
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000536 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000537 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Neal Norwitz60da3162006-03-07 04:48:24 +0000538 Py_XDECREF(gdict);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000539 return NULL;
540 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000541 retval = newregexobject(npattern, tran, pattern, gdict);
542 Py_DECREF(npattern);
543 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000544}
545
546
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000547static PyObject *cache_pat;
548static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000549
550static int
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000551update_cache(PyObject *pat)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000552{
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000553 PyObject *tuple = PyTuple_Pack(1, pat);
Barry Warsawc3573251996-12-20 21:56:07 +0000554 int status = 0;
555
556 if (!tuple)
557 return -1;
558
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000559 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000560 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000561 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000562 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000563 cache_prog = regex_compile((PyObject *)NULL, tuple);
564 if (cache_prog == NULL) {
565 status = -1;
566 goto finally;
567 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000568 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000569 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000570 }
Barry Warsawc3573251996-12-20 21:56:07 +0000571 finally:
572 Py_DECREF(tuple);
573 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000574}
575
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000576static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000577regex_match(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000578{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000579 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000580 PyObject *tuple, *v;
581
Neal Norwitzba3a16c2002-03-31 15:27:00 +0000582 if (!PyArg_ParseTuple(args, "SS:match", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000583 return NULL;
584 if (update_cache(pat) < 0)
585 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000586
587 if (!(tuple = Py_BuildValue("(S)", string)))
588 return NULL;
589 v = regobj_match((regexobject *)cache_prog, tuple);
590 Py_DECREF(tuple);
591 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000592}
593
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000594static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000595regex_search(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000596{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000597 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000598 PyObject *tuple, *v;
599
Neal Norwitzba3a16c2002-03-31 15:27:00 +0000600 if (!PyArg_ParseTuple(args, "SS:search", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000601 return NULL;
602 if (update_cache(pat) < 0)
603 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000604
605 if (!(tuple = Py_BuildValue("(S)", string)))
606 return NULL;
607 v = regobj_search((regexobject *)cache_prog, tuple);
608 Py_DECREF(tuple);
609 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000610}
611
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000612static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000613regex_set_syntax(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000614{
615 int syntax;
Neal Norwitzba3a16c2002-03-31 15:27:00 +0000616 if (!PyArg_ParseTuple(args, "i:set_syntax", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000617 return NULL;
618 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000619 /* wipe the global pattern cache */
620 Py_XDECREF(cache_pat);
621 cache_pat = NULL;
622 Py_XDECREF(cache_prog);
623 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000624 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000625}
626
Barry Warsaw909d7c31997-02-18 18:48:50 +0000627static PyObject *
Neal Norwitzba3a16c2002-03-31 15:27:00 +0000628regex_get_syntax(PyObject *self)
Barry Warsaw909d7c31997-02-18 18:48:50 +0000629{
Barry Warsaw909d7c31997-02-18 18:48:50 +0000630 return PyInt_FromLong((long)re_syntax);
631}
632
633
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000634static struct PyMethodDef regex_global_methods[] = {
Martin v. Löwis43b936d2002-01-17 23:15:58 +0000635 {"compile", regex_compile, METH_VARARGS},
636 {"symcomp", regex_symcomp, METH_VARARGS},
Neal Norwitzba3a16c2002-03-31 15:27:00 +0000637 {"match", regex_match, METH_VARARGS},
638 {"search", regex_search, METH_VARARGS},
639 {"set_syntax", regex_set_syntax, METH_VARARGS},
640 {"get_syntax", (PyCFunction)regex_get_syntax, METH_NOARGS},
Barry Warsawc3573251996-12-20 21:56:07 +0000641 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000642};
643
Mark Hammondfe51c6d2002-08-02 02:27:13 +0000644PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000645initregex(void)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000646{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000647 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000648 int i;
649 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000650
Guido van Rossuma120ffc2001-01-22 15:29:14 +0000651 /* Initialize object type */
652 Regextype.ob_type = &PyType_Type;
653
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000654 m = Py_InitModule("regex", regex_global_methods);
Neal Norwitz1ac754f2006-01-19 06:09:39 +0000655 if (m == NULL)
656 return;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000657 d = PyModule_GetDict(m);
Guido van Rossumb1d13612000-12-19 18:21:39 +0000658
Thomas Wouters4ccf1192001-07-09 10:45:31 +0000659 if (PyErr_Warn(PyExc_DeprecationWarning,
660 "the regex module is deprecated; "
661 "please use the re module") < 0)
Tim Peters06e415f2001-07-09 18:15:38 +0000662 return;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000663
664 /* Initialize regex.error exception */
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000665 v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
Barry Warsawc3573251996-12-20 21:56:07 +0000666 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
667 goto finally;
668
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000669 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000670 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
671 goto finally;
672
673 if (!(s = PyString_AsString(v)))
674 goto finally;
675
676 for (i = 0; i < 256; i++) {
677 if (isupper(i))
678 s[i] = tolower(i);
679 else
680 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000681 }
Barry Warsawc3573251996-12-20 21:56:07 +0000682 if (PyDict_SetItemString(d, "casefold", v) < 0)
683 goto finally;
684 Py_DECREF(v);
685
686 if (!PyErr_Occurred())
687 return;
688 finally:
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000689 /* Nothing */ ;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000690}