blob: 342d77141f5fa58dea15f45dc6ae70ca45b40e6b [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000015both that copyright notice and this permission notice appear in
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000017Centrum or CWI or Corporation for National Research Initiatives or
18CNRI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior
20permission.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossumd266eb41996-10-25 14:44:06 +000022While CWI is the initial source for this software, a modified version
23is made available by the Corporation for National Research Initiatives
24(CNRI) at the Internet address ftp://ftp.python.org.
25
26STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33PERFORMANCE OF THIS SOFTWARE.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35******************************************************************/
36
37/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038/* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000040
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042
Guido van Rossum1cab95c1992-01-19 16:31:57 +000043#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000044
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000045static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000046
47typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000048 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000049 struct re_pattern_buffer re_patbuf; /* The compiled expression */
50 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000051 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000052 PyObject *re_translate; /* String object for translate table */
53 PyObject *re_lastok; /* String object last matched/searched */
54 PyObject *re_groupindex; /* Group name to index dictionary */
55 PyObject *re_givenpat; /* Pattern with symbolic groups */
56 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000057} regexobject;
58
59/* Regex object methods */
60
61static void
62reg_dealloc(re)
63 regexobject *re;
64{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000065 PyMem_XDEL(re->re_patbuf.buffer);
66 Py_XDECREF(re->re_translate);
67 Py_XDECREF(re->re_lastok);
68 Py_XDECREF(re->re_groupindex);
69 Py_XDECREF(re->re_givenpat);
70 Py_XDECREF(re->re_realpat);
71 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000072}
73
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000074static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000075makeresult(regs)
76 struct re_registers *regs;
77{
Guido van Rossumc1962021996-10-08 14:18:42 +000078 PyObject *v;
79 int i;
80 static PyObject *filler = NULL;
81 if (filler == NULL) {
82 filler = Py_BuildValue("(ii)", -1, -1);
83 if (filler == NULL)
84 return NULL;
85 }
86 v = PyTuple_New(RE_NREGS);
87 if (v == NULL)
88 return NULL;
89 for (i = 0; i < RE_NREGS; i++) {
90 int lo = regs->start[i];
91 int hi = regs->end[i];
92 PyObject *w;
93 if (lo == -1 && hi == -1) {
94 w = filler;
95 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000096 }
Guido van Rossumc1962021996-10-08 14:18:42 +000097 else
98 w = Py_BuildValue("(ii)", lo, hi);
99 if (w == NULL) {
100 Py_XDECREF(v);
101 return NULL;
102 }
103 PyTuple_SetItem(v, i, w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000104 }
105 return v;
106}
107
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000108static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000109reg_match(re, args)
110 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000111 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000112{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000113 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000114 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000115 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000116 int offset;
117 int result;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000118 if (PyArg_Parse(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000119 offset = 0;
120 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000121 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000122 PyErr_Clear();
123 if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000124 return NULL;
125 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000126 buffer = PyString_AsString(argstring);
127 size = PyString_Size(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000128 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000129 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000130 return NULL;
131 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000132 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000133 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000134 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000135 if (result < -1) {
136 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000137 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000138 return NULL;
139 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000140 if (result >= 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000141 Py_INCREF(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000142 re->re_lastok = argstring;
143 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000144 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000145}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000146
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000147static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000148reg_search(re, args)
149 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000150 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000151{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000152 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000153 char *buffer;
154 int size;
155 int offset;
156 int range;
157 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000158
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000159 if (PyArg_Parse(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000160 offset = 0;
161 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000162 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000163 PyErr_Clear();
164 if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000165 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000166 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000167 buffer = PyString_AsString(argstring);
168 size = PyString_Size(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000169 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000170 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000171 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000172 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000173 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
174 the implementation don't match: the documentation states that
175 |range| positions are tried, while the code tries |range|+1
176 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000177 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000178 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000179 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000180 result = re_search(&re->re_patbuf, buffer, size, offset, range,
181 &re->re_regs);
182 if (result < -1) {
183 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000184 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000185 return NULL;
186 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000187 if (result >= 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000188 Py_INCREF(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000189 re->re_lastok = argstring;
190 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000191 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000192}
193
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000194static PyObject *
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000195reg_group(re, args)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000196 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000197 PyObject *args;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000198{
199 int i, a, b;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000200 if (args != NULL && PyTuple_Check(args)) {
201 int n = PyTuple_Size(args);
202 PyObject *res = PyTuple_New(n);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000203 if (res == NULL)
204 return NULL;
205 for (i = 0; i < n; i++) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000206 PyObject *v = reg_group(re, PyTuple_GetItem(args, i));
Guido van Rossum36d330b1993-02-21 20:12:16 +0000207 if (v == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000208 Py_DECREF(res);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000209 return NULL;
210 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000211 PyTuple_SetItem(res, i, v);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000212 }
213 return res;
214 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000215 if (!PyArg_Parse(args, "i", &i)) {
216 PyObject *n;
217 PyErr_Clear();
218 if (!PyArg_Parse(args, "S", &n))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000219 return NULL;
220 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000221 PyObject *index;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000222 if (re->re_groupindex == NULL)
223 index = NULL;
224 else
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000225 index = PyDict_GetItem(re->re_groupindex, n);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000226 if (index == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000227 PyErr_SetString(RegexError, "group() group name doesn't exist");
Guido van Rossumb6775db1994-08-01 11:34:53 +0000228 return NULL;
229 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000230 i = PyInt_AsLong(index);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000231 }
232 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000233 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000234 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000235 return NULL;
236 }
237 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000238 PyErr_SetString(RegexError,
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000239 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000240 return NULL;
241 }
242 a = re->re_regs.start[i];
243 b = re->re_regs.end[i];
244 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000245 Py_INCREF(Py_None);
246 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000247 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000248 return PyString_FromStringAndSize(PyString_AsString(re->re_lastok)+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000249}
250
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000251static struct PyMethodDef reg_methods[] = {
252 {"match", (PyCFunction)reg_match},
253 {"search", (PyCFunction)reg_search},
254 {"group", (PyCFunction)reg_group},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000255 {NULL, NULL} /* sentinel */
256};
257
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000258static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000259reg_getattr(re, name)
260 regexobject *re;
261 char *name;
262{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000263 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000264 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000265 Py_INCREF(Py_None);
266 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000267 }
268 return makeresult(&re->re_regs);
269 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000270 if (strcmp(name, "last") == 0) {
271 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000272 Py_INCREF(Py_None);
273 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000274 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000275 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000276 return re->re_lastok;
277 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000278 if (strcmp(name, "translate") == 0) {
279 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000280 Py_INCREF(Py_None);
281 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000282 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000283 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000284 return re->re_translate;
285 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000286 if (strcmp(name, "groupindex") == 0) {
287 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000288 Py_INCREF(Py_None);
289 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000290 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000291 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000292 return re->re_groupindex;
293 }
294 if (strcmp(name, "realpat") == 0) {
295 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000296 Py_INCREF(Py_None);
297 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000298 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000299 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000300 return re->re_realpat;
301 }
302 if (strcmp(name, "givenpat") == 0) {
303 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000304 Py_INCREF(Py_None);
305 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000306 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000307 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000308 return re->re_givenpat;
309 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000310 if (strcmp(name, "__members__") == 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000311 PyObject *list = PyList_New(6);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000312 if (list) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000313 PyList_SetItem(list, 0, PyString_FromString("last"));
314 PyList_SetItem(list, 1, PyString_FromString("regs"));
315 PyList_SetItem(list, 2, PyString_FromString("translate"));
316 PyList_SetItem(list, 3, PyString_FromString("groupindex"));
317 PyList_SetItem(list, 4, PyString_FromString("realpat"));
318 PyList_SetItem(list, 5, PyString_FromString("givenpat"));
319 if (PyErr_Occurred()) {
320 Py_DECREF(list);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000321 list = NULL;
322 }
323 }
324 return list;
325 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000326 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000327}
328
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000329static PyTypeObject Regextype = {
330 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000331 0, /*ob_size*/
332 "regex", /*tp_name*/
333 sizeof(regexobject), /*tp_size*/
334 0, /*tp_itemsize*/
335 /* methods */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000336 (destructor)reg_dealloc, /*tp_dealloc*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000337 0, /*tp_print*/
Guido van Rossumb6775db1994-08-01 11:34:53 +0000338 (getattrfunc)reg_getattr, /*tp_getattr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000339 0, /*tp_setattr*/
340 0, /*tp_compare*/
341 0, /*tp_repr*/
342};
343
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000344static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000345newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000346 PyObject *pattern;
347 PyObject *translate;
348 PyObject *givenpat;
349 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000350{
351 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000352 char *pat = PyString_AsString(pattern);
353 int size = PyString_Size(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000354
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000355 if (translate != NULL && PyString_Size(translate) != 256) {
356 PyErr_SetString(RegexError,
Guido van Rossum36d330b1993-02-21 20:12:16 +0000357 "translation table must be 256 bytes");
358 return NULL;
359 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000360 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000361 if (re != NULL) {
362 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000363 re->re_patbuf.buffer = NULL;
364 re->re_patbuf.allocated = 0;
365 re->re_patbuf.fastmap = re->re_fastmap;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000366 if (translate)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000367 re->re_patbuf.translate = PyString_AsString(translate);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000368 else
369 re->re_patbuf.translate = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000370 Py_XINCREF(translate);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000371 re->re_translate = translate;
372 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000373 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000374 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000375 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000376 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000377 re->re_givenpat = givenpat;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000378 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000379 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000380 PyErr_SetString(RegexError, error);
381 Py_DECREF(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000382 re = NULL;
383 }
384 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000385 return (PyObject *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000386}
387
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000388static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000389regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000390 PyObject *self;
391 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000392{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000393 PyObject *pat = NULL;
394 PyObject *tran = NULL;
395 if (!PyArg_Parse(args, "S", &pat)) {
396 PyErr_Clear();
397 if (!PyArg_Parse(args, "(SS)", &pat, &tran))
Guido van Rossum36d330b1993-02-21 20:12:16 +0000398 return NULL;
399 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000400 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000401}
402
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000403static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000404symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000405 PyObject *pattern;
406 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000407{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000408 char *opat = PyString_AsString(pattern);
409 char *oend = opat + PyString_Size(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000410 int group_count = 0;
411 int escaped = 0;
412 char *o = opat;
413 char *n;
414 char name_buf[128];
415 char *g;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000416 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000417 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
418
Guido van Rossumab28c561996-06-11 18:33:14 +0000419 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000420 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000421 return pattern;
422 }
423
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000424 npattern = PyString_FromStringAndSize((char*)NULL, PyString_Size(pattern));
Guido van Rossumb6775db1994-08-01 11:34:53 +0000425 if (npattern == NULL)
426 return NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000427 n = PyString_AsString(npattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000428
429 while (o < oend) {
430 if (*o == '(' && escaped == require_escape) {
431 char *backtrack;
432 escaped = 0;
433 ++group_count;
434 *n++ = *o;
435 if (++o >= oend || *o != '<')
436 continue;
437 /* *o == '<' */
438 if (o+1 < oend && *(o+1) == '>')
439 continue;
440 backtrack = o;
441 g = name_buf;
442 for (++o; o < oend;) {
443 if (*o == '>') {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000444 PyObject *group_name = NULL;
445 PyObject *group_index = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000446 *g++ = '\0';
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000447 group_name = PyString_FromString(name_buf);
448 group_index = PyInt_FromLong(group_count);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000449 if (group_name == NULL || group_index == NULL
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000450 || PyDict_SetItem(gdict, group_name, group_index) != 0) {
451 Py_XDECREF(group_name);
452 Py_XDECREF(group_index);
453 Py_XDECREF(npattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000454 return NULL;
455 }
456 ++o; /* eat the '>' */
457 break;
458 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000459 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000460 o = backtrack;
461 break;
462 }
463 *g++ = *o++;
464 }
465 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000466 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000467 *n++ = *o;
468 ++o; /* eat the char following '[' */
469 *n++ = *o;
470 while (o < oend && *o != ']') {
471 ++o;
472 *n++ = *o;
473 }
474 if (o < oend)
475 ++o;
476 }
477 else if (*o == '\\') {
478 escaped = 1;
479 *n++ = *o;
480 ++o;
481 }
482 else {
483 escaped = 0;
484 *n++ = *o;
485 ++o;
486 }
487 }
488
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000489 if (_PyString_Resize(&npattern, n - PyString_AsString(npattern)) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000490 return npattern;
491 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492 return NULL;
493 }
494
495}
496
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000497static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000498regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000499 PyObject *self;
500 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000501{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000502 PyObject *pattern;
503 PyObject *tran = NULL;
504 PyObject *gdict = NULL;
505 PyObject *npattern;
506 if (!PyArg_Parse(args, "S", &pattern)) {
507 PyErr_Clear();
508 if (!PyArg_Parse(args, "(SS)", &pattern, &tran))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000509 return NULL;
510 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000511 gdict = PyDict_New();
Guido van Rossumb6775db1994-08-01 11:34:53 +0000512 if (gdict == NULL
513 || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000514 Py_DECREF(gdict);
515 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000516 return NULL;
517 }
518 return newregexobject(npattern, tran, pattern, gdict);
519}
520
521
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000522static PyObject *cache_pat;
523static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000524
525static int
526update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000527 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000528{
529 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000530 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000531 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000532 Py_XDECREF(cache_prog);
533 cache_prog = regex_compile((PyObject *)NULL, pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000534 if (cache_prog == NULL)
535 return -1;
536 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000537 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000538 }
539 return 0;
540}
541
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000542static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000543regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000544 PyObject *self;
545 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000546{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000547 PyObject *pat, *string;
548 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000549 return NULL;
550 if (update_cache(pat) < 0)
551 return NULL;
552 return reg_match((regexobject *)cache_prog, string);
553}
554
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000555static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000556regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000557 PyObject *self;
558 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000559{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000560 PyObject *pat, *string;
561 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000562 return NULL;
563 if (update_cache(pat) < 0)
564 return NULL;
565 return reg_search((regexobject *)cache_prog, string);
566}
567
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000568static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000569regex_set_syntax(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000570 PyObject *self, *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000571{
572 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000573 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000574 return NULL;
575 syntax = re_set_syntax(syntax);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000576 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000577}
578
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000579static struct PyMethodDef regex_global_methods[] = {
Guido van Rossum295d1711995-02-19 15:55:19 +0000580 {"compile", regex_compile, 0},
581 {"symcomp", regex_symcomp, 0},
582 {"match", regex_match, 0},
583 {"search", regex_search, 0},
584 {"set_syntax", regex_set_syntax, 0},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000585 {NULL, NULL} /* sentinel */
586};
587
Guido van Rossum8f3032d1996-08-19 22:03:12 +0000588void
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000589initregex()
590{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000591 PyObject *m, *d, *v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000592
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000593 m = Py_InitModule("regex", regex_global_methods);
594 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000595
596 /* Initialize regex.error exception */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000597 RegexError = PyString_FromString("regex.error");
598 if (RegexError == NULL || PyDict_SetItemString(d, "error", RegexError) != 0)
599 Py_FatalError("can't define regex.error");
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000600
601 /* Initialize regex.casefold constant */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000602 v = PyString_FromStringAndSize((char *)NULL, 256);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000603 if (v != NULL) {
604 int i;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000605 char *s = PyString_AsString(v);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000606 for (i = 0; i < 256; i++) {
607 if (isupper(i))
608 s[i] = tolower(i);
609 else
610 s[i] = i;
611 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000612 PyDict_SetItemString(d, "casefold", v);
613 Py_DECREF(v);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000614 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000615}