blob: 6f566c31f1478ab97fc64b61802aae4ad8529189 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
12Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
14provided that the above copyright notice appear in all copies and that
15both that copyright notice and this permission notice appear in
16supporting documentation, and that the names of Stichting Mathematisch
17Centrum or CWI not be used in advertising or publicity pertaining to
18distribution of the software without specific, written prior permission.
19
20STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
21THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
22FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
23FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
24WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
25ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
26OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
27
28******************************************************************/
29
30/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000031/* This uses Tatu Ylonen's copyleft-free reimplementation of
32 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000033
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000034#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000035
Guido van Rossum1cab95c1992-01-19 16:31:57 +000036#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000037
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000038static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000039
40typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042 struct re_pattern_buffer re_patbuf; /* The compiled expression */
43 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000045 PyObject *re_translate; /* String object for translate table */
46 PyObject *re_lastok; /* String object last matched/searched */
47 PyObject *re_groupindex; /* Group name to index dictionary */
48 PyObject *re_givenpat; /* Pattern with symbolic groups */
49 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000050} regexobject;
51
52/* Regex object methods */
53
54static void
55reg_dealloc(re)
56 regexobject *re;
57{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000058 PyMem_XDEL(re->re_patbuf.buffer);
59 Py_XDECREF(re->re_translate);
60 Py_XDECREF(re->re_lastok);
61 Py_XDECREF(re->re_groupindex);
62 Py_XDECREF(re->re_givenpat);
63 Py_XDECREF(re->re_realpat);
64 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000065}
66
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000067static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000068makeresult(regs)
69 struct re_registers *regs;
70{
Guido van Rossumc1962021996-10-08 14:18:42 +000071 PyObject *v;
72 int i;
73 static PyObject *filler = NULL;
74 if (filler == NULL) {
75 filler = Py_BuildValue("(ii)", -1, -1);
76 if (filler == NULL)
77 return NULL;
78 }
79 v = PyTuple_New(RE_NREGS);
80 if (v == NULL)
81 return NULL;
82 for (i = 0; i < RE_NREGS; i++) {
83 int lo = regs->start[i];
84 int hi = regs->end[i];
85 PyObject *w;
86 if (lo == -1 && hi == -1) {
87 w = filler;
88 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000089 }
Guido van Rossumc1962021996-10-08 14:18:42 +000090 else
91 w = Py_BuildValue("(ii)", lo, hi);
92 if (w == NULL) {
93 Py_XDECREF(v);
94 return NULL;
95 }
96 PyTuple_SetItem(v, i, w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000097 }
98 return v;
99}
100
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000101static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000102reg_match(re, args)
103 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000104 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000105{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000106 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000107 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000108 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000109 int offset;
110 int result;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000111 if (PyArg_Parse(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000112 offset = 0;
113 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000114 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000115 PyErr_Clear();
116 if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000117 return NULL;
118 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000119 buffer = PyString_AsString(argstring);
120 size = PyString_Size(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000121 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000122 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000123 return NULL;
124 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000125 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000126 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000127 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000128 if (result < -1) {
129 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000130 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000131 return NULL;
132 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000133 if (result >= 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000134 Py_INCREF(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000135 re->re_lastok = argstring;
136 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000137 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000138}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000139
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000140static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000141reg_search(re, args)
142 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000143 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000144{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000145 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000146 char *buffer;
147 int size;
148 int offset;
149 int range;
150 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000151
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000152 if (PyArg_Parse(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000153 offset = 0;
154 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000155 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000156 PyErr_Clear();
157 if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000158 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000159 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000160 buffer = PyString_AsString(argstring);
161 size = PyString_Size(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000162 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000163 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000164 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000165 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000166 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
167 the implementation don't match: the documentation states that
168 |range| positions are tried, while the code tries |range|+1
169 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000170 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000171 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000172 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000173 result = re_search(&re->re_patbuf, buffer, size, offset, range,
174 &re->re_regs);
175 if (result < -1) {
176 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000177 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000178 return NULL;
179 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000180 if (result >= 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000181 Py_INCREF(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000182 re->re_lastok = argstring;
183 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000184 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000185}
186
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000187static PyObject *
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000188reg_group(re, args)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000189 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000190 PyObject *args;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000191{
192 int i, a, b;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000193 if (args != NULL && PyTuple_Check(args)) {
194 int n = PyTuple_Size(args);
195 PyObject *res = PyTuple_New(n);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000196 if (res == NULL)
197 return NULL;
198 for (i = 0; i < n; i++) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000199 PyObject *v = reg_group(re, PyTuple_GetItem(args, i));
Guido van Rossum36d330b1993-02-21 20:12:16 +0000200 if (v == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000201 Py_DECREF(res);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000202 return NULL;
203 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000204 PyTuple_SetItem(res, i, v);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000205 }
206 return res;
207 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000208 if (!PyArg_Parse(args, "i", &i)) {
209 PyObject *n;
210 PyErr_Clear();
211 if (!PyArg_Parse(args, "S", &n))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000212 return NULL;
213 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000214 PyObject *index;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000215 if (re->re_groupindex == NULL)
216 index = NULL;
217 else
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000218 index = PyDict_GetItem(re->re_groupindex, n);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000219 if (index == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000220 PyErr_SetString(RegexError, "group() group name doesn't exist");
Guido van Rossumb6775db1994-08-01 11:34:53 +0000221 return NULL;
222 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000223 i = PyInt_AsLong(index);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000224 }
225 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000226 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000227 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000228 return NULL;
229 }
230 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000231 PyErr_SetString(RegexError,
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000232 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000233 return NULL;
234 }
235 a = re->re_regs.start[i];
236 b = re->re_regs.end[i];
237 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000238 Py_INCREF(Py_None);
239 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000240 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000241 return PyString_FromStringAndSize(PyString_AsString(re->re_lastok)+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000242}
243
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000244static struct PyMethodDef reg_methods[] = {
245 {"match", (PyCFunction)reg_match},
246 {"search", (PyCFunction)reg_search},
247 {"group", (PyCFunction)reg_group},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000248 {NULL, NULL} /* sentinel */
249};
250
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000251static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000252reg_getattr(re, name)
253 regexobject *re;
254 char *name;
255{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000256 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000257 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000258 Py_INCREF(Py_None);
259 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000260 }
261 return makeresult(&re->re_regs);
262 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000263 if (strcmp(name, "last") == 0) {
264 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000265 Py_INCREF(Py_None);
266 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000267 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000268 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000269 return re->re_lastok;
270 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000271 if (strcmp(name, "translate") == 0) {
272 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000273 Py_INCREF(Py_None);
274 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000275 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000276 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000277 return re->re_translate;
278 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000279 if (strcmp(name, "groupindex") == 0) {
280 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000281 Py_INCREF(Py_None);
282 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000283 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000284 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000285 return re->re_groupindex;
286 }
287 if (strcmp(name, "realpat") == 0) {
288 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000289 Py_INCREF(Py_None);
290 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000291 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000292 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000293 return re->re_realpat;
294 }
295 if (strcmp(name, "givenpat") == 0) {
296 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000297 Py_INCREF(Py_None);
298 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000299 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000300 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000301 return re->re_givenpat;
302 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000303 if (strcmp(name, "__members__") == 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000304 PyObject *list = PyList_New(6);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000305 if (list) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000306 PyList_SetItem(list, 0, PyString_FromString("last"));
307 PyList_SetItem(list, 1, PyString_FromString("regs"));
308 PyList_SetItem(list, 2, PyString_FromString("translate"));
309 PyList_SetItem(list, 3, PyString_FromString("groupindex"));
310 PyList_SetItem(list, 4, PyString_FromString("realpat"));
311 PyList_SetItem(list, 5, PyString_FromString("givenpat"));
312 if (PyErr_Occurred()) {
313 Py_DECREF(list);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000314 list = NULL;
315 }
316 }
317 return list;
318 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000319 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000320}
321
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000322static PyTypeObject Regextype = {
323 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000324 0, /*ob_size*/
325 "regex", /*tp_name*/
326 sizeof(regexobject), /*tp_size*/
327 0, /*tp_itemsize*/
328 /* methods */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000329 (destructor)reg_dealloc, /*tp_dealloc*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000330 0, /*tp_print*/
Guido van Rossumb6775db1994-08-01 11:34:53 +0000331 (getattrfunc)reg_getattr, /*tp_getattr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000332 0, /*tp_setattr*/
333 0, /*tp_compare*/
334 0, /*tp_repr*/
335};
336
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000337static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000338newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000339 PyObject *pattern;
340 PyObject *translate;
341 PyObject *givenpat;
342 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000343{
344 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000345 char *pat = PyString_AsString(pattern);
346 int size = PyString_Size(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000347
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000348 if (translate != NULL && PyString_Size(translate) != 256) {
349 PyErr_SetString(RegexError,
Guido van Rossum36d330b1993-02-21 20:12:16 +0000350 "translation table must be 256 bytes");
351 return NULL;
352 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000353 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000354 if (re != NULL) {
355 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000356 re->re_patbuf.buffer = NULL;
357 re->re_patbuf.allocated = 0;
358 re->re_patbuf.fastmap = re->re_fastmap;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000359 if (translate)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000360 re->re_patbuf.translate = PyString_AsString(translate);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000361 else
362 re->re_patbuf.translate = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000363 Py_XINCREF(translate);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000364 re->re_translate = translate;
365 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000366 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000367 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000368 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000369 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000370 re->re_givenpat = givenpat;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000371 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000372 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000373 PyErr_SetString(RegexError, error);
374 Py_DECREF(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000375 re = NULL;
376 }
377 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000378 return (PyObject *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000379}
380
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000381static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000382regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000383 PyObject *self;
384 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000385{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000386 PyObject *pat = NULL;
387 PyObject *tran = NULL;
388 if (!PyArg_Parse(args, "S", &pat)) {
389 PyErr_Clear();
390 if (!PyArg_Parse(args, "(SS)", &pat, &tran))
Guido van Rossum36d330b1993-02-21 20:12:16 +0000391 return NULL;
392 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000393 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000394}
395
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000396static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000397symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000398 PyObject *pattern;
399 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000400{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000401 char *opat = PyString_AsString(pattern);
402 char *oend = opat + PyString_Size(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000403 int group_count = 0;
404 int escaped = 0;
405 char *o = opat;
406 char *n;
407 char name_buf[128];
408 char *g;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000409 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000410 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
411
Guido van Rossumab28c561996-06-11 18:33:14 +0000412 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000413 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000414 return pattern;
415 }
416
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000417 npattern = PyString_FromStringAndSize((char*)NULL, PyString_Size(pattern));
Guido van Rossumb6775db1994-08-01 11:34:53 +0000418 if (npattern == NULL)
419 return NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000420 n = PyString_AsString(npattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000421
422 while (o < oend) {
423 if (*o == '(' && escaped == require_escape) {
424 char *backtrack;
425 escaped = 0;
426 ++group_count;
427 *n++ = *o;
428 if (++o >= oend || *o != '<')
429 continue;
430 /* *o == '<' */
431 if (o+1 < oend && *(o+1) == '>')
432 continue;
433 backtrack = o;
434 g = name_buf;
435 for (++o; o < oend;) {
436 if (*o == '>') {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000437 PyObject *group_name = NULL;
438 PyObject *group_index = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000439 *g++ = '\0';
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000440 group_name = PyString_FromString(name_buf);
441 group_index = PyInt_FromLong(group_count);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000442 if (group_name == NULL || group_index == NULL
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000443 || PyDict_SetItem(gdict, group_name, group_index) != 0) {
444 Py_XDECREF(group_name);
445 Py_XDECREF(group_index);
446 Py_XDECREF(npattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000447 return NULL;
448 }
449 ++o; /* eat the '>' */
450 break;
451 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000452 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000453 o = backtrack;
454 break;
455 }
456 *g++ = *o++;
457 }
458 }
459 if (*o == '[' && !escaped) {
460 *n++ = *o;
461 ++o; /* eat the char following '[' */
462 *n++ = *o;
463 while (o < oend && *o != ']') {
464 ++o;
465 *n++ = *o;
466 }
467 if (o < oend)
468 ++o;
469 }
470 else if (*o == '\\') {
471 escaped = 1;
472 *n++ = *o;
473 ++o;
474 }
475 else {
476 escaped = 0;
477 *n++ = *o;
478 ++o;
479 }
480 }
481
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000482 if (_PyString_Resize(&npattern, n - PyString_AsString(npattern)) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000483 return npattern;
484 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000485 return NULL;
486 }
487
488}
489
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000490static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000491regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000492 PyObject *self;
493 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000494{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000495 PyObject *pattern;
496 PyObject *tran = NULL;
497 PyObject *gdict = NULL;
498 PyObject *npattern;
499 if (!PyArg_Parse(args, "S", &pattern)) {
500 PyErr_Clear();
501 if (!PyArg_Parse(args, "(SS)", &pattern, &tran))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000502 return NULL;
503 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000504 gdict = PyDict_New();
Guido van Rossumb6775db1994-08-01 11:34:53 +0000505 if (gdict == NULL
506 || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000507 Py_DECREF(gdict);
508 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000509 return NULL;
510 }
511 return newregexobject(npattern, tran, pattern, gdict);
512}
513
514
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000515static PyObject *cache_pat;
516static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000517
518static int
519update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000520 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000521{
522 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000523 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000524 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000525 Py_XDECREF(cache_prog);
526 cache_prog = regex_compile((PyObject *)NULL, pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000527 if (cache_prog == NULL)
528 return -1;
529 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000530 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000531 }
532 return 0;
533}
534
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000535static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000536regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000537 PyObject *self;
538 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000539{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000540 PyObject *pat, *string;
541 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000542 return NULL;
543 if (update_cache(pat) < 0)
544 return NULL;
545 return reg_match((regexobject *)cache_prog, string);
546}
547
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000548static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000549regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000550 PyObject *self;
551 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000552{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000553 PyObject *pat, *string;
554 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000555 return NULL;
556 if (update_cache(pat) < 0)
557 return NULL;
558 return reg_search((regexobject *)cache_prog, string);
559}
560
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000561static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000562regex_set_syntax(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000563 PyObject *self, *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000564{
565 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000566 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000567 return NULL;
568 syntax = re_set_syntax(syntax);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000569 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000570}
571
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000572static struct PyMethodDef regex_global_methods[] = {
Guido van Rossum295d1711995-02-19 15:55:19 +0000573 {"compile", regex_compile, 0},
574 {"symcomp", regex_symcomp, 0},
575 {"match", regex_match, 0},
576 {"search", regex_search, 0},
577 {"set_syntax", regex_set_syntax, 0},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000578 {NULL, NULL} /* sentinel */
579};
580
Guido van Rossum8f3032d1996-08-19 22:03:12 +0000581void
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000582initregex()
583{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000584 PyObject *m, *d, *v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000585
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000586 m = Py_InitModule("regex", regex_global_methods);
587 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000588
589 /* Initialize regex.error exception */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000590 RegexError = PyString_FromString("regex.error");
591 if (RegexError == NULL || PyDict_SetItemString(d, "error", RegexError) != 0)
592 Py_FatalError("can't define regex.error");
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000593
594 /* Initialize regex.casefold constant */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000595 v = PyString_FromStringAndSize((char *)NULL, 256);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000596 if (v != NULL) {
597 int i;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000598 char *s = PyString_AsString(v);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000599 for (i = 0; i < 256; i++) {
600 if (isupper(i))
601 s[i] = tolower(i);
602 else
603 s[i] = i;
604 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000605 PyDict_SetItemString(d, "casefold", v);
606 Py_DECREF(v);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000607 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000608}