blob: 9cbd4bf4acaedc49cf8948ec629d0a41edc9dd13 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
12Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
14provided that the above copyright notice appear in all copies and that
15both that copyright notice and this permission notice appear in
16supporting documentation, and that the names of Stichting Mathematisch
17Centrum or CWI not be used in advertising or publicity pertaining to
18distribution of the software without specific, written prior permission.
19
20STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
21THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
22FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
23FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
24WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
25ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
26OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
27
28******************************************************************/
29
30/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000031/* This uses Tatu Ylonen's copyleft-free reimplementation of
32 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000033
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000034#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000035
Guido van Rossum1cab95c1992-01-19 16:31:57 +000036#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000037
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000038static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000039
40typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042 struct re_pattern_buffer re_patbuf; /* The compiled expression */
43 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000045 PyObject *re_translate; /* String object for translate table */
46 PyObject *re_lastok; /* String object last matched/searched */
47 PyObject *re_groupindex; /* Group name to index dictionary */
48 PyObject *re_givenpat; /* Pattern with symbolic groups */
49 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000050} regexobject;
51
52/* Regex object methods */
53
54static void
55reg_dealloc(re)
56 regexobject *re;
57{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000058 PyMem_XDEL(re->re_patbuf.buffer);
59 Py_XDECREF(re->re_translate);
60 Py_XDECREF(re->re_lastok);
61 Py_XDECREF(re->re_groupindex);
62 Py_XDECREF(re->re_givenpat);
63 Py_XDECREF(re->re_realpat);
64 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000065}
66
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000067static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000068makeresult(regs)
69 struct re_registers *regs;
70{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000071 PyObject *v = PyTuple_New(RE_NREGS);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000072 if (v != NULL) {
73 int i;
74 for (i = 0; i < RE_NREGS; i++) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000075 PyObject *w;
76 w = Py_BuildValue("(ii)", regs->start[i], regs->end[i]);
Guido van Rossum36d330b1993-02-21 20:12:16 +000077 if (w == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000078 Py_XDECREF(v);
Guido van Rossum36d330b1993-02-21 20:12:16 +000079 v = NULL;
80 break;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000081 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000082 PyTuple_SetItem(v, i, w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000083 }
84 }
85 return v;
86}
87
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000088static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000089reg_match(re, args)
90 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000091 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000092{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000093 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000094 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000095 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000096 int offset;
97 int result;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000098 if (PyArg_Parse(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000099 offset = 0;
100 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000101 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000102 PyErr_Clear();
103 if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000104 return NULL;
105 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000106 buffer = PyString_AsString(argstring);
107 size = PyString_Size(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000108 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000109 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000110 return NULL;
111 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000112 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000113 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000114 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000115 if (result < -1) {
116 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000117 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000118 return NULL;
119 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000120 if (result >= 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000121 Py_INCREF(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000122 re->re_lastok = argstring;
123 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000124 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000125}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000126
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000127static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000128reg_search(re, args)
129 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000130 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000131{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000132 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000133 char *buffer;
134 int size;
135 int offset;
136 int range;
137 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000138
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000139 if (PyArg_Parse(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000140 offset = 0;
141 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000142 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000143 PyErr_Clear();
144 if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000145 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000146 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000147 buffer = PyString_AsString(argstring);
148 size = PyString_Size(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000149 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000150 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000151 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000152 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000153 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
154 the implementation don't match: the documentation states that
155 |range| positions are tried, while the code tries |range|+1
156 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000157 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000158 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000159 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000160 result = re_search(&re->re_patbuf, buffer, size, offset, range,
161 &re->re_regs);
162 if (result < -1) {
163 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000164 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000165 return NULL;
166 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000167 if (result >= 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000168 Py_INCREF(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000169 re->re_lastok = argstring;
170 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000171 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000172}
173
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000174static PyObject *
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000175reg_group(re, args)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000176 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000177 PyObject *args;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000178{
179 int i, a, b;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000180 if (args != NULL && PyTuple_Check(args)) {
181 int n = PyTuple_Size(args);
182 PyObject *res = PyTuple_New(n);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000183 if (res == NULL)
184 return NULL;
185 for (i = 0; i < n; i++) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000186 PyObject *v = reg_group(re, PyTuple_GetItem(args, i));
Guido van Rossum36d330b1993-02-21 20:12:16 +0000187 if (v == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000188 Py_DECREF(res);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000189 return NULL;
190 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000191 PyTuple_SetItem(res, i, v);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000192 }
193 return res;
194 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000195 if (!PyArg_Parse(args, "i", &i)) {
196 PyObject *n;
197 PyErr_Clear();
198 if (!PyArg_Parse(args, "S", &n))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000199 return NULL;
200 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000201 PyObject *index;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000202 if (re->re_groupindex == NULL)
203 index = NULL;
204 else
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000205 index = PyDict_GetItem(re->re_groupindex, n);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000206 if (index == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000207 PyErr_SetString(RegexError, "group() group name doesn't exist");
Guido van Rossumb6775db1994-08-01 11:34:53 +0000208 return NULL;
209 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000210 i = PyInt_AsLong(index);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000211 }
212 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000213 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000214 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000215 return NULL;
216 }
217 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000218 PyErr_SetString(RegexError,
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000219 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000220 return NULL;
221 }
222 a = re->re_regs.start[i];
223 b = re->re_regs.end[i];
224 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000225 Py_INCREF(Py_None);
226 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000227 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000228 return PyString_FromStringAndSize(PyString_AsString(re->re_lastok)+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000229}
230
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000231static struct PyMethodDef reg_methods[] = {
232 {"match", (PyCFunction)reg_match},
233 {"search", (PyCFunction)reg_search},
234 {"group", (PyCFunction)reg_group},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000235 {NULL, NULL} /* sentinel */
236};
237
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000238static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000239reg_getattr(re, name)
240 regexobject *re;
241 char *name;
242{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000243 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000244 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000245 Py_INCREF(Py_None);
246 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000247 }
248 return makeresult(&re->re_regs);
249 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000250 if (strcmp(name, "last") == 0) {
251 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000252 Py_INCREF(Py_None);
253 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000254 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000255 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000256 return re->re_lastok;
257 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000258 if (strcmp(name, "translate") == 0) {
259 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000260 Py_INCREF(Py_None);
261 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000262 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000263 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000264 return re->re_translate;
265 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000266 if (strcmp(name, "groupindex") == 0) {
267 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000268 Py_INCREF(Py_None);
269 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000270 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000271 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000272 return re->re_groupindex;
273 }
274 if (strcmp(name, "realpat") == 0) {
275 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000276 Py_INCREF(Py_None);
277 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000278 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000279 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000280 return re->re_realpat;
281 }
282 if (strcmp(name, "givenpat") == 0) {
283 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000284 Py_INCREF(Py_None);
285 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000286 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000287 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000288 return re->re_givenpat;
289 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000290 if (strcmp(name, "__members__") == 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000291 PyObject *list = PyList_New(6);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000292 if (list) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000293 PyList_SetItem(list, 0, PyString_FromString("last"));
294 PyList_SetItem(list, 1, PyString_FromString("regs"));
295 PyList_SetItem(list, 2, PyString_FromString("translate"));
296 PyList_SetItem(list, 3, PyString_FromString("groupindex"));
297 PyList_SetItem(list, 4, PyString_FromString("realpat"));
298 PyList_SetItem(list, 5, PyString_FromString("givenpat"));
299 if (PyErr_Occurred()) {
300 Py_DECREF(list);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000301 list = NULL;
302 }
303 }
304 return list;
305 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000306 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000307}
308
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000309static PyTypeObject Regextype = {
310 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000311 0, /*ob_size*/
312 "regex", /*tp_name*/
313 sizeof(regexobject), /*tp_size*/
314 0, /*tp_itemsize*/
315 /* methods */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000316 (destructor)reg_dealloc, /*tp_dealloc*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000317 0, /*tp_print*/
Guido van Rossumb6775db1994-08-01 11:34:53 +0000318 (getattrfunc)reg_getattr, /*tp_getattr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000319 0, /*tp_setattr*/
320 0, /*tp_compare*/
321 0, /*tp_repr*/
322};
323
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000324static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000325newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000326 PyObject *pattern;
327 PyObject *translate;
328 PyObject *givenpat;
329 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000330{
331 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000332 char *pat = PyString_AsString(pattern);
333 int size = PyString_Size(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000334
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000335 if (translate != NULL && PyString_Size(translate) != 256) {
336 PyErr_SetString(RegexError,
Guido van Rossum36d330b1993-02-21 20:12:16 +0000337 "translation table must be 256 bytes");
338 return NULL;
339 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000340 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000341 if (re != NULL) {
342 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000343 re->re_patbuf.buffer = NULL;
344 re->re_patbuf.allocated = 0;
345 re->re_patbuf.fastmap = re->re_fastmap;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000346 if (translate)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000347 re->re_patbuf.translate = PyString_AsString(translate);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000348 else
349 re->re_patbuf.translate = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000350 Py_XINCREF(translate);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000351 re->re_translate = translate;
352 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000353 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000354 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000355 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000356 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000357 re->re_givenpat = givenpat;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000358 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000359 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000360 PyErr_SetString(RegexError, error);
361 Py_DECREF(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000362 re = NULL;
363 }
364 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000365 return (PyObject *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000366}
367
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000368static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000369regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000370 PyObject *self;
371 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000372{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000373 PyObject *pat = NULL;
374 PyObject *tran = NULL;
375 if (!PyArg_Parse(args, "S", &pat)) {
376 PyErr_Clear();
377 if (!PyArg_Parse(args, "(SS)", &pat, &tran))
Guido van Rossum36d330b1993-02-21 20:12:16 +0000378 return NULL;
379 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000380 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000381}
382
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000383static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000384symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000385 PyObject *pattern;
386 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000387{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000388 char *opat = PyString_AsString(pattern);
389 char *oend = opat + PyString_Size(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000390 int group_count = 0;
391 int escaped = 0;
392 char *o = opat;
393 char *n;
394 char name_buf[128];
395 char *g;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000396 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000397 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
398
Guido van Rossumab28c561996-06-11 18:33:14 +0000399 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000400 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000401 return pattern;
402 }
403
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000404 npattern = PyString_FromStringAndSize((char*)NULL, PyString_Size(pattern));
Guido van Rossumb6775db1994-08-01 11:34:53 +0000405 if (npattern == NULL)
406 return NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000407 n = PyString_AsString(npattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000408
409 while (o < oend) {
410 if (*o == '(' && escaped == require_escape) {
411 char *backtrack;
412 escaped = 0;
413 ++group_count;
414 *n++ = *o;
415 if (++o >= oend || *o != '<')
416 continue;
417 /* *o == '<' */
418 if (o+1 < oend && *(o+1) == '>')
419 continue;
420 backtrack = o;
421 g = name_buf;
422 for (++o; o < oend;) {
423 if (*o == '>') {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000424 PyObject *group_name = NULL;
425 PyObject *group_index = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000426 *g++ = '\0';
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000427 group_name = PyString_FromString(name_buf);
428 group_index = PyInt_FromLong(group_count);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000429 if (group_name == NULL || group_index == NULL
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000430 || PyDict_SetItem(gdict, group_name, group_index) != 0) {
431 Py_XDECREF(group_name);
432 Py_XDECREF(group_index);
433 Py_XDECREF(npattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000434 return NULL;
435 }
436 ++o; /* eat the '>' */
437 break;
438 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000439 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000440 o = backtrack;
441 break;
442 }
443 *g++ = *o++;
444 }
445 }
446 if (*o == '[' && !escaped) {
447 *n++ = *o;
448 ++o; /* eat the char following '[' */
449 *n++ = *o;
450 while (o < oend && *o != ']') {
451 ++o;
452 *n++ = *o;
453 }
454 if (o < oend)
455 ++o;
456 }
457 else if (*o == '\\') {
458 escaped = 1;
459 *n++ = *o;
460 ++o;
461 }
462 else {
463 escaped = 0;
464 *n++ = *o;
465 ++o;
466 }
467 }
468
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000469 if (_PyString_Resize(&npattern, n - PyString_AsString(npattern)) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000470 return npattern;
471 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000472 return NULL;
473 }
474
475}
476
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000477static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000478regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000479 PyObject *self;
480 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000481{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000482 PyObject *pattern;
483 PyObject *tran = NULL;
484 PyObject *gdict = NULL;
485 PyObject *npattern;
486 if (!PyArg_Parse(args, "S", &pattern)) {
487 PyErr_Clear();
488 if (!PyArg_Parse(args, "(SS)", &pattern, &tran))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000489 return NULL;
490 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000491 gdict = PyDict_New();
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492 if (gdict == NULL
493 || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000494 Py_DECREF(gdict);
495 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000496 return NULL;
497 }
498 return newregexobject(npattern, tran, pattern, gdict);
499}
500
501
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000502static PyObject *cache_pat;
503static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000504
505static int
506update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000507 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000508{
509 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000510 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000511 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000512 Py_XDECREF(cache_prog);
513 cache_prog = regex_compile((PyObject *)NULL, pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000514 if (cache_prog == NULL)
515 return -1;
516 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000517 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000518 }
519 return 0;
520}
521
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000522static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000523regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000524 PyObject *self;
525 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000526{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000527 PyObject *pat, *string;
528 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000529 return NULL;
530 if (update_cache(pat) < 0)
531 return NULL;
532 return reg_match((regexobject *)cache_prog, string);
533}
534
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000535static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000536regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000537 PyObject *self;
538 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000539{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000540 PyObject *pat, *string;
541 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000542 return NULL;
543 if (update_cache(pat) < 0)
544 return NULL;
545 return reg_search((regexobject *)cache_prog, string);
546}
547
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000548static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000549regex_set_syntax(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000550 PyObject *self, *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000551{
552 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000553 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000554 return NULL;
555 syntax = re_set_syntax(syntax);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000556 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000557}
558
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000559static struct PyMethodDef regex_global_methods[] = {
Guido van Rossum295d1711995-02-19 15:55:19 +0000560 {"compile", regex_compile, 0},
561 {"symcomp", regex_symcomp, 0},
562 {"match", regex_match, 0},
563 {"search", regex_search, 0},
564 {"set_syntax", regex_set_syntax, 0},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000565 {NULL, NULL} /* sentinel */
566};
567
568initregex()
569{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000570 PyObject *m, *d, *v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000571
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000572 m = Py_InitModule("regex", regex_global_methods);
573 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000574
575 /* Initialize regex.error exception */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000576 RegexError = PyString_FromString("regex.error");
577 if (RegexError == NULL || PyDict_SetItemString(d, "error", RegexError) != 0)
578 Py_FatalError("can't define regex.error");
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000579
580 /* Initialize regex.casefold constant */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000581 v = PyString_FromStringAndSize((char *)NULL, 256);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000582 if (v != NULL) {
583 int i;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000584 char *s = PyString_AsString(v);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000585 for (i = 0; i < 256; i++) {
586 if (isupper(i))
587 s[i] = tolower(i);
588 else
589 s[i] = i;
590 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000591 PyDict_SetItemString(d, "casefold", v);
592 Py_DECREF(v);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000593 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000594}