blob: 749de9e7ab48632ccfb5a82d235d9f51e6e38c99 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00006
7/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +00008/* This uses Tatu Ylonen's copyleft-free reimplementation of
9 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000010
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000011#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000012
Guido van Rossuma376cc51996-12-05 23:43:35 +000013#include <ctype.h>
14
Guido van Rossum1cab95c1992-01-19 16:31:57 +000015#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000017static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000018
19typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000020 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021 struct re_pattern_buffer re_patbuf; /* The compiled expression */
22 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000023 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000024 PyObject *re_translate; /* String object for translate table */
25 PyObject *re_lastok; /* String object last matched/searched */
26 PyObject *re_groupindex; /* Group name to index dictionary */
27 PyObject *re_givenpat; /* Pattern with symbolic groups */
28 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000029} regexobject;
30
31/* Regex object methods */
32
33static void
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000034reg_dealloc(regexobject *re)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000035{
Guido van Rossumb18618d2000-05-03 23:44:39 +000036 if (re->re_patbuf.buffer)
Vladimir Marangozov9e3d73a2000-07-12 00:49:17 +000037 free(re->re_patbuf.buffer);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000038 Py_XDECREF(re->re_translate);
39 Py_XDECREF(re->re_lastok);
40 Py_XDECREF(re->re_groupindex);
41 Py_XDECREF(re->re_givenpat);
42 Py_XDECREF(re->re_realpat);
Guido van Rossumb18618d2000-05-03 23:44:39 +000043 PyObject_Del(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044}
45
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000046static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000047makeresult(struct re_registers *regs)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048{
Guido van Rossumc1962021996-10-08 14:18:42 +000049 PyObject *v;
50 int i;
51 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000052
Guido van Rossumc1962021996-10-08 14:18:42 +000053 if (filler == NULL) {
54 filler = Py_BuildValue("(ii)", -1, -1);
55 if (filler == NULL)
56 return NULL;
57 }
58 v = PyTuple_New(RE_NREGS);
59 if (v == NULL)
60 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000061
Guido van Rossumc1962021996-10-08 14:18:42 +000062 for (i = 0; i < RE_NREGS; i++) {
63 int lo = regs->start[i];
64 int hi = regs->end[i];
65 PyObject *w;
66 if (lo == -1 && hi == -1) {
67 w = filler;
68 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000069 }
Guido van Rossumc1962021996-10-08 14:18:42 +000070 else
71 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +000072 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
73 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +000074 return NULL;
75 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000076 }
77 return v;
78}
79
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000080static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +000081regobj_match(regexobject *re, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000082{
Guido van Rossum4a807f51997-05-12 16:04:09 +000083 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000084 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000085 int size;
Barry Warsawc3573251996-12-20 21:56:07 +000086 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000087 int result;
Barry Warsawc3573251996-12-20 21:56:07 +000088
Guido van Rossum43713e52000-02-29 13:59:29 +000089 if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +000090 return NULL;
Guido van Rossum7e488981998-10-08 02:25:24 +000091 if (!PyArg_Parse(argstring, "t#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +000092 return NULL;
93
Guido van Rossum36d330b1993-02-21 20:12:16 +000094 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000095 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +000096 return NULL;
97 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000098 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +000099 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000100 result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
Guido van Rossum0318bd61997-08-14 14:35:12 +0000101 &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000102 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000103 /* Serious failure of some sort; if re_match didn't
104 set an exception, raise a generic error */
105 if (!PyErr_Occurred())
106 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000107 return NULL;
108 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000109 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000110 Py_INCREF(argstring);
111 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000112 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000113 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000114}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000115
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000116static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000117regobj_search(regexobject *re, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000118{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000119 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000120 char *buffer;
121 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000122 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000123 int range;
124 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000125
Guido van Rossum43713e52000-02-29 13:59:29 +0000126 if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +0000127 return NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000128 if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000129 return NULL;
130
Guido van Rossum36d330b1993-02-21 20:12:16 +0000131 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000132 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000133 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000134 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000135 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
136 the implementation don't match: the documentation states that
137 |range| positions are tried, while the code tries |range|+1
138 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000139 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000140 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000141 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000142 result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000143 &re->re_regs);
144 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000145 /* Serious failure of some sort; if re_match didn't
146 set an exception, raise a generic error */
147 if (!PyErr_Occurred())
148 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000149 return NULL;
150 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000151 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000152 Py_INCREF(argstring);
153 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000154 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000155 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000156}
157
Barry Warsawc3573251996-12-20 21:56:07 +0000158/* get the group from the regex where index can be a string (group name) or
159 an integer index [0 .. 99]
160 */
161static PyObject*
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000162group_from_index(regexobject *re, PyObject *index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000163{
164 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000165 char *v;
166
167 if (PyString_Check(index))
168 if (re->re_groupindex == NULL ||
169 !(index = PyDict_GetItem(re->re_groupindex, index)))
170 {
171 PyErr_SetString(RegexError,
172 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000173 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000174 }
Barry Warsawc3573251996-12-20 21:56:07 +0000175
176 i = PyInt_AsLong(index);
177 if (i == -1 && PyErr_Occurred())
178 return NULL;
179
Guido van Rossum36d330b1993-02-21 20:12:16 +0000180 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000181 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000182 return NULL;
183 }
184 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000185 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000186 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000187 return NULL;
188 }
189 a = re->re_regs.start[i];
190 b = re->re_regs.end[i];
191 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000192 Py_INCREF(Py_None);
193 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000194 }
Barry Warsawc3573251996-12-20 21:56:07 +0000195
196 if (!(v = PyString_AsString(re->re_lastok)))
197 return NULL;
198
199 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000200}
201
Barry Warsawc3573251996-12-20 21:56:07 +0000202
203static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000204regobj_group(regexobject *re, PyObject *args)
Barry Warsawc3573251996-12-20 21:56:07 +0000205{
206 int n = PyTuple_Size(args);
207 int i;
208 PyObject *res = NULL;
209
210 if (n < 0)
211 return NULL;
212 if (n == 0) {
213 PyErr_SetString(PyExc_TypeError, "not enough arguments");
214 return NULL;
215 }
216 if (n == 1) {
217 /* return value is a single string */
218 PyObject *index = PyTuple_GetItem(args, 0);
219 if (!index)
220 return NULL;
221
222 return group_from_index(re, index);
223 }
224
225 /* return value is a tuple */
226 if (!(res = PyTuple_New(n)))
227 return NULL;
228
229 for (i = 0; i < n; i++) {
230 PyObject *index = PyTuple_GetItem(args, i);
231 PyObject *group = NULL;
232
233 if (!index)
234 goto finally;
235 if (!(group = group_from_index(re, index)))
236 goto finally;
237 if (PyTuple_SetItem(res, i, group) < 0)
238 goto finally;
239 }
240 return res;
241
242 finally:
243 Py_DECREF(res);
244 return NULL;
245}
246
247
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000248static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000249 {"match", (PyCFunction)regobj_match, 1},
250 {"search", (PyCFunction)regobj_search, 1},
251 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000252 {NULL, NULL} /* sentinel */
253};
254
Barry Warsawc3573251996-12-20 21:56:07 +0000255
256
257static char* members[] = {
258 "last", "regs", "translate",
259 "groupindex", "realpat", "givenpat",
260 NULL
261};
262
263
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000264static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000265regobj_getattr(regexobject *re, char *name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000266{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000267 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000268 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000269 Py_INCREF(Py_None);
270 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000271 }
272 return makeresult(&re->re_regs);
273 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000274 if (strcmp(name, "last") == 0) {
275 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000276 Py_INCREF(Py_None);
277 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000278 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000279 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000280 return re->re_lastok;
281 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000282 if (strcmp(name, "translate") == 0) {
283 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000284 Py_INCREF(Py_None);
285 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000286 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000287 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000288 return re->re_translate;
289 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000290 if (strcmp(name, "groupindex") == 0) {
291 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000292 Py_INCREF(Py_None);
293 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000294 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000295 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000296 return re->re_groupindex;
297 }
298 if (strcmp(name, "realpat") == 0) {
299 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000300 Py_INCREF(Py_None);
301 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000302 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000303 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000304 return re->re_realpat;
305 }
306 if (strcmp(name, "givenpat") == 0) {
307 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000308 Py_INCREF(Py_None);
309 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000310 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000311 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000312 return re->re_givenpat;
313 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000314 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000315 int i = 0;
316 PyObject *list = NULL;
317
318 /* okay, so it's unlikely this list will change that often.
319 still, it's easier to change it in just one place.
320 */
321 while (members[i])
322 i++;
323 if (!(list = PyList_New(i)))
324 return NULL;
325
326 i = 0;
327 while (members[i]) {
328 PyObject* v = PyString_FromString(members[i]);
329 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000330 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000331 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000332 }
Barry Warsawc3573251996-12-20 21:56:07 +0000333 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000334 }
335 return list;
336 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000337 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000338}
339
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000340static PyTypeObject Regextype = {
341 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000342 0, /*ob_size*/
343 "regex", /*tp_name*/
344 sizeof(regexobject), /*tp_size*/
345 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000346 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000347 (destructor)reg_dealloc, /*tp_dealloc*/
348 0, /*tp_print*/
349 (getattrfunc)regobj_getattr, /*tp_getattr*/
350 0, /*tp_setattr*/
351 0, /*tp_compare*/
352 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000353};
354
Barry Warsawc3573251996-12-20 21:56:07 +0000355/* reference counting invariants:
356 pattern: borrowed
357 translate: borrowed
358 givenpat: borrowed
359 groupindex: transferred
360*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000361static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000362newregexobject(PyObject *pattern, PyObject *translate, PyObject *givenpat, PyObject *groupindex)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000363{
364 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000365 char *pat;
366 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000367
Guido van Rossum7e488981998-10-08 02:25:24 +0000368 if (!PyArg_Parse(pattern, "t#", &pat, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000369 return NULL;
370
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000371 if (translate != NULL && PyString_Size(translate) != 256) {
372 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000373 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000374 return NULL;
375 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000376 re = PyObject_New(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000377 if (re != NULL) {
378 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000379 re->re_patbuf.buffer = NULL;
380 re->re_patbuf.allocated = 0;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000381 re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000382 if (translate) {
Guido van Rossumed2554a1997-08-18 15:31:24 +0000383 re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000384 if (!re->re_patbuf.translate)
385 goto finally;
386 Py_INCREF(translate);
387 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000388 else
389 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000390 re->re_translate = translate;
391 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000392 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000393 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000394 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000395 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000396 re->re_givenpat = givenpat;
Guido van Rossumd19c04a1997-09-03 00:47:36 +0000397 error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000398 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000399 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000400 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000401 }
402 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000403 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000404 finally:
405 Py_DECREF(re);
406 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000407}
408
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000409static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000410regex_compile(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000411{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000412 PyObject *pat = NULL;
413 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000414
Guido van Rossum43713e52000-02-29 13:59:29 +0000415 if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000416 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000417 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000418}
419
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000420static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000421symcomp(PyObject *pattern, PyObject *gdict)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000422{
Barry Warsawc3573251996-12-20 21:56:07 +0000423 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000424 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000425 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000426 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000427 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000428 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000429 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
430
Barry Warsawc3573251996-12-20 21:56:07 +0000431 if (!(opat = PyString_AsString(pattern)))
432 return NULL;
433
434 if ((sz = PyString_Size(pattern)) < 0)
435 return NULL;
436
437 oend = opat + sz;
438 o = opat;
439
Guido van Rossumab28c561996-06-11 18:33:14 +0000440 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000441 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000442 return pattern;
443 }
444
Barry Warsawc3573251996-12-20 21:56:07 +0000445 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
446 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000447 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000448
449 while (o < oend) {
450 if (*o == '(' && escaped == require_escape) {
451 char *backtrack;
452 escaped = 0;
453 ++group_count;
454 *n++ = *o;
455 if (++o >= oend || *o != '<')
456 continue;
457 /* *o == '<' */
458 if (o+1 < oend && *(o+1) == '>')
459 continue;
460 backtrack = o;
461 g = name_buf;
462 for (++o; o < oend;) {
463 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000464 PyObject *group_name = NULL;
465 PyObject *group_index = NULL;
466 *g++ = '\0';
467 group_name = PyString_FromString(name_buf);
468 group_index = PyInt_FromLong(group_count);
469 if (group_name == NULL ||
470 group_index == NULL ||
471 PyDict_SetItem(gdict, group_name,
472 group_index) != 0)
473 {
474 Py_XDECREF(group_name);
475 Py_XDECREF(group_index);
476 Py_XDECREF(npattern);
477 return NULL;
478 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000479 Py_DECREF(group_name);
480 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000481 ++o; /* eat the '>' */
482 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000483 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000484 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000485 o = backtrack;
486 break;
487 }
488 *g++ = *o++;
489 }
490 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000491 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000493 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000494 *n++ = *o;
495 while (o < oend && *o != ']') {
496 ++o;
497 *n++ = *o;
498 }
499 if (o < oend)
500 ++o;
501 }
502 else if (*o == '\\') {
503 escaped = 1;
504 *n++ = *o;
505 ++o;
506 }
507 else {
508 escaped = 0;
509 *n++ = *o;
510 ++o;
511 }
512 }
513
Barry Warsawc3573251996-12-20 21:56:07 +0000514 if (!(v = PyString_AsString(npattern))) {
515 Py_DECREF(npattern);
516 return NULL;
517 }
518 /* _PyString_Resize() decrements npattern on failure */
519 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000520 return npattern;
521 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000522 return NULL;
523 }
524
525}
526
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000527static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000528regex_symcomp(PyObject *self, PyObject *args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000529{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000530 PyObject *pattern;
531 PyObject *tran = NULL;
532 PyObject *gdict = NULL;
533 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000534 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000535
Guido van Rossum43713e52000-02-29 13:59:29 +0000536 if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000537 return NULL;
538
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000539 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000540 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000541 Py_DECREF(gdict);
542 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000543 return NULL;
544 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000545 retval = newregexobject(npattern, tran, pattern, gdict);
546 Py_DECREF(npattern);
547 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000548}
549
550
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000551static PyObject *cache_pat;
552static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000553
554static int
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000555update_cache(PyObject *pat)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000556{
Barry Warsawc3573251996-12-20 21:56:07 +0000557 PyObject *tuple = Py_BuildValue("(O)", pat);
558 int status = 0;
559
560 if (!tuple)
561 return -1;
562
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000563 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000564 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000565 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000566 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000567 cache_prog = regex_compile((PyObject *)NULL, tuple);
568 if (cache_prog == NULL) {
569 status = -1;
570 goto finally;
571 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000572 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000573 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000574 }
Barry Warsawc3573251996-12-20 21:56:07 +0000575 finally:
576 Py_DECREF(tuple);
577 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000578}
579
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000580static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000581regex_match(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000582{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000583 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000584 PyObject *tuple, *v;
585
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000586 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000587 return NULL;
588 if (update_cache(pat) < 0)
589 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000590
591 if (!(tuple = Py_BuildValue("(S)", string)))
592 return NULL;
593 v = regobj_match((regexobject *)cache_prog, tuple);
594 Py_DECREF(tuple);
595 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000596}
597
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000598static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000599regex_search(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000600{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000601 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000602 PyObject *tuple, *v;
603
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000604 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000605 return NULL;
606 if (update_cache(pat) < 0)
607 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000608
609 if (!(tuple = Py_BuildValue("(S)", string)))
610 return NULL;
611 v = regobj_search((regexobject *)cache_prog, tuple);
612 Py_DECREF(tuple);
613 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000614}
615
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000616static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000617regex_set_syntax(PyObject *self, PyObject *args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000618{
619 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000620 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000621 return NULL;
622 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000623 /* wipe the global pattern cache */
624 Py_XDECREF(cache_pat);
625 cache_pat = NULL;
626 Py_XDECREF(cache_prog);
627 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000628 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000629}
630
Barry Warsaw909d7c31997-02-18 18:48:50 +0000631static PyObject *
Peter Schneider-Kamp7d0c71a2000-07-10 13:05:29 +0000632regex_get_syntax(PyObject *self, PyObject *args)
Barry Warsaw909d7c31997-02-18 18:48:50 +0000633{
634 if (!PyArg_Parse(args, ""))
635 return NULL;
636 return PyInt_FromLong((long)re_syntax);
637}
638
639
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000640static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000641 {"compile", regex_compile, 1},
642 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000643 {"match", regex_match, 0},
644 {"search", regex_search, 0},
645 {"set_syntax", regex_set_syntax, 0},
Barry Warsaw909d7c31997-02-18 18:48:50 +0000646 {"get_syntax", regex_get_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000647 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000648};
649
Guido van Rossum3886bb61998-12-04 18:50:17 +0000650DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000651initregex(void)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000652{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000653 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000654 int i;
655 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000656
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000657 m = Py_InitModule("regex", regex_global_methods);
658 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000659
660 /* Initialize regex.error exception */
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000661 v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
Barry Warsawc3573251996-12-20 21:56:07 +0000662 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
663 goto finally;
664
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000665 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000666 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
667 goto finally;
668
669 if (!(s = PyString_AsString(v)))
670 goto finally;
671
672 for (i = 0; i < 256; i++) {
673 if (isupper(i))
674 s[i] = tolower(i);
675 else
676 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000677 }
Barry Warsawc3573251996-12-20 21:56:07 +0000678 if (PyDict_SetItemString(d, "casefold", v) < 0)
679 goto finally;
680 Py_DECREF(v);
681
682 if (!PyErr_Occurred())
683 return;
684 finally:
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000685 /* Nothing */ ;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000686}