blob: 41c7315aa4e3eaf42df805392068dfe979c1a31b [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000015both that copyright notice and this permission notice appear in
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000017Centrum or CWI or Corporation for National Research Initiatives or
18CNRI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior
20permission.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossumd266eb41996-10-25 14:44:06 +000022While CWI is the initial source for this software, a modified version
23is made available by the Corporation for National Research Initiatives
24(CNRI) at the Internet address ftp://ftp.python.org.
25
26STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33PERFORMANCE OF THIS SOFTWARE.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35******************************************************************/
36
37/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038/* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000040
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042
Guido van Rossuma376cc51996-12-05 23:43:35 +000043#include <ctype.h>
44
Guido van Rossum1cab95c1992-01-19 16:31:57 +000045#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000046
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000047static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048
49typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000050 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000051 struct re_pattern_buffer re_patbuf; /* The compiled expression */
52 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000053 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000054 PyObject *re_translate; /* String object for translate table */
55 PyObject *re_lastok; /* String object last matched/searched */
56 PyObject *re_groupindex; /* Group name to index dictionary */
57 PyObject *re_givenpat; /* Pattern with symbolic groups */
58 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000059} regexobject;
60
61/* Regex object methods */
62
63static void
64reg_dealloc(re)
65 regexobject *re;
66{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000067 PyMem_XDEL(re->re_patbuf.buffer);
68 Py_XDECREF(re->re_translate);
69 Py_XDECREF(re->re_lastok);
70 Py_XDECREF(re->re_groupindex);
71 Py_XDECREF(re->re_givenpat);
72 Py_XDECREF(re->re_realpat);
73 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000074}
75
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000076static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000077makeresult(regs)
78 struct re_registers *regs;
79{
Guido van Rossumc1962021996-10-08 14:18:42 +000080 PyObject *v;
81 int i;
82 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000083
Guido van Rossumc1962021996-10-08 14:18:42 +000084 if (filler == NULL) {
85 filler = Py_BuildValue("(ii)", -1, -1);
86 if (filler == NULL)
87 return NULL;
88 }
89 v = PyTuple_New(RE_NREGS);
90 if (v == NULL)
91 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000092
Guido van Rossumc1962021996-10-08 14:18:42 +000093 for (i = 0; i < RE_NREGS; i++) {
94 int lo = regs->start[i];
95 int hi = regs->end[i];
96 PyObject *w;
97 if (lo == -1 && hi == -1) {
98 w = filler;
99 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000100 }
Guido van Rossumc1962021996-10-08 14:18:42 +0000101 else
102 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +0000103 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
104 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +0000105 return NULL;
106 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000107 }
108 return v;
109}
110
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000111static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000112regobj_match(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000113 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000114 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000115{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000116 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000117 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000118 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000119 int result;
Barry Warsawc3573251996-12-20 21:56:07 +0000120
121 if (!PyArg_ParseTuple(args, "s#|i", &buffer, &size, &offset))
122 return NULL;
123
Guido van Rossum36d330b1993-02-21 20:12:16 +0000124 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000125 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000126 return NULL;
127 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000128 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000129 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000130 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000131 if (result < -1) {
132 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000133 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000134 return NULL;
135 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000136 if (result >= 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000137 PyObject* str = PyString_FromStringAndSize(buffer, size);
138 if (!str)
139 return NULL;
140 re->re_lastok = str;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000141 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000142 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000143}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000144
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000145static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000146regobj_search(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000147 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000148 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000149{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000150 char *buffer;
151 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000152 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000153 int range;
154 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000155
Barry Warsawc3573251996-12-20 21:56:07 +0000156 if (!PyArg_ParseTuple(args, "s#|i", &buffer, &size, &offset))
157 return NULL;
158
Guido van Rossum36d330b1993-02-21 20:12:16 +0000159 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000160 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000161 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000162 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000163 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
164 the implementation don't match: the documentation states that
165 |range| positions are tried, while the code tries |range|+1
166 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000167 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000168 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000169 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000170 result = re_search(&re->re_patbuf, buffer, size, offset, range,
171 &re->re_regs);
172 if (result < -1) {
173 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000174 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000175 return NULL;
176 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000177 if (result >= 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000178 PyObject* str = PyString_FromStringAndSize(buffer, size);
179 if (!str)
180 return NULL;
181 re->re_lastok = str;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000182 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000183 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000184}
185
Barry Warsawc3573251996-12-20 21:56:07 +0000186/* get the group from the regex where index can be a string (group name) or
187 an integer index [0 .. 99]
188 */
189static PyObject*
190group_from_index(re, index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000191 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000192 PyObject *index;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000193{
194 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000195 char *v;
196
197 if (PyString_Check(index))
198 if (re->re_groupindex == NULL ||
199 !(index = PyDict_GetItem(re->re_groupindex, index)))
200 {
201 PyErr_SetString(RegexError,
202 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000203 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000204 }
Barry Warsawc3573251996-12-20 21:56:07 +0000205
206 i = PyInt_AsLong(index);
207 if (i == -1 && PyErr_Occurred())
208 return NULL;
209
Guido van Rossum36d330b1993-02-21 20:12:16 +0000210 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000211 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000212 return NULL;
213 }
214 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000215 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000216 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000217 return NULL;
218 }
219 a = re->re_regs.start[i];
220 b = re->re_regs.end[i];
221 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000222 Py_INCREF(Py_None);
223 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000224 }
Barry Warsawc3573251996-12-20 21:56:07 +0000225
226 if (!(v = PyString_AsString(re->re_lastok)))
227 return NULL;
228
229 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000230}
231
Barry Warsawc3573251996-12-20 21:56:07 +0000232
233static PyObject *
234regobj_group(re, args)
235 regexobject *re;
236 PyObject *args;
237{
238 int n = PyTuple_Size(args);
239 int i;
240 PyObject *res = NULL;
241
242 if (n < 0)
243 return NULL;
244 if (n == 0) {
245 PyErr_SetString(PyExc_TypeError, "not enough arguments");
246 return NULL;
247 }
248 if (n == 1) {
249 /* return value is a single string */
250 PyObject *index = PyTuple_GetItem(args, 0);
251 if (!index)
252 return NULL;
253
254 return group_from_index(re, index);
255 }
256
257 /* return value is a tuple */
258 if (!(res = PyTuple_New(n)))
259 return NULL;
260
261 for (i = 0; i < n; i++) {
262 PyObject *index = PyTuple_GetItem(args, i);
263 PyObject *group = NULL;
264
265 if (!index)
266 goto finally;
267 if (!(group = group_from_index(re, index)))
268 goto finally;
269 if (PyTuple_SetItem(res, i, group) < 0)
270 goto finally;
271 }
272 return res;
273
274 finally:
275 Py_DECREF(res);
276 return NULL;
277}
278
279
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000280static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000281 {"match", (PyCFunction)regobj_match, 1},
282 {"search", (PyCFunction)regobj_search, 1},
283 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000284 {NULL, NULL} /* sentinel */
285};
286
Barry Warsawc3573251996-12-20 21:56:07 +0000287
288
289static char* members[] = {
290 "last", "regs", "translate",
291 "groupindex", "realpat", "givenpat",
292 NULL
293};
294
295
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000296static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000297regobj_getattr(re, name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000298 regexobject *re;
299 char *name;
300{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000301 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000302 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000303 Py_INCREF(Py_None);
304 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000305 }
306 return makeresult(&re->re_regs);
307 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000308 if (strcmp(name, "last") == 0) {
309 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000310 Py_INCREF(Py_None);
311 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000312 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000313 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000314 return re->re_lastok;
315 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000316 if (strcmp(name, "translate") == 0) {
317 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000318 Py_INCREF(Py_None);
319 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000320 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000321 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000322 return re->re_translate;
323 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000324 if (strcmp(name, "groupindex") == 0) {
325 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000326 Py_INCREF(Py_None);
327 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000328 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000329 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000330 return re->re_groupindex;
331 }
332 if (strcmp(name, "realpat") == 0) {
333 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000334 Py_INCREF(Py_None);
335 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000336 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000337 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000338 return re->re_realpat;
339 }
340 if (strcmp(name, "givenpat") == 0) {
341 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000342 Py_INCREF(Py_None);
343 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000344 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000345 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000346 return re->re_givenpat;
347 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000348 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000349 int i = 0;
350 PyObject *list = NULL;
351
352 /* okay, so it's unlikely this list will change that often.
353 still, it's easier to change it in just one place.
354 */
355 while (members[i])
356 i++;
357 if (!(list = PyList_New(i)))
358 return NULL;
359
360 i = 0;
361 while (members[i]) {
362 PyObject* v = PyString_FromString(members[i]);
363 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000364 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000365 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000366 }
Barry Warsawc3573251996-12-20 21:56:07 +0000367 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000368 }
369 return list;
370 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000371 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000372}
373
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000374static PyTypeObject Regextype = {
375 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000376 0, /*ob_size*/
377 "regex", /*tp_name*/
378 sizeof(regexobject), /*tp_size*/
379 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000380 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000381 (destructor)reg_dealloc, /*tp_dealloc*/
382 0, /*tp_print*/
383 (getattrfunc)regobj_getattr, /*tp_getattr*/
384 0, /*tp_setattr*/
385 0, /*tp_compare*/
386 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000387};
388
Barry Warsawc3573251996-12-20 21:56:07 +0000389/* reference counting invariants:
390 pattern: borrowed
391 translate: borrowed
392 givenpat: borrowed
393 groupindex: transferred
394*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000395static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000396newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000397 PyObject *pattern;
398 PyObject *translate;
399 PyObject *givenpat;
400 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000401{
402 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000403 char *pat;
404 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000405
Barry Warsawc3573251996-12-20 21:56:07 +0000406 if (!PyArg_Parse(pattern, "s#", &pat, &size))
407 return NULL;
408
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000409 if (translate != NULL && PyString_Size(translate) != 256) {
410 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000411 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000412 return NULL;
413 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000414 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000415 if (re != NULL) {
416 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000417 re->re_patbuf.buffer = NULL;
418 re->re_patbuf.allocated = 0;
419 re->re_patbuf.fastmap = re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000420 if (translate) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000421 re->re_patbuf.translate = PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000422 if (!re->re_patbuf.translate)
423 goto finally;
424 Py_INCREF(translate);
425 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000426 else
427 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000428 re->re_translate = translate;
429 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000430 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000431 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000432 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000433 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000434 re->re_givenpat = givenpat;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000435 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000436 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000437 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000438 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000439 }
440 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000441 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000442 finally:
443 Py_DECREF(re);
444 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000445}
446
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000447static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000448regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000449 PyObject *self;
450 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000451{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000452 PyObject *pat = NULL;
453 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000454
455 if (!PyArg_ParseTuple(args, "S|S", &pat, &tran))
456 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000457 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000458}
459
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000460static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000461symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000462 PyObject *pattern;
463 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000464{
Barry Warsawc3573251996-12-20 21:56:07 +0000465 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000466 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000467 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000468 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000469 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000470 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000471 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
472
Barry Warsawc3573251996-12-20 21:56:07 +0000473 if (!(opat = PyString_AsString(pattern)))
474 return NULL;
475
476 if ((sz = PyString_Size(pattern)) < 0)
477 return NULL;
478
479 oend = opat + sz;
480 o = opat;
481
Guido van Rossumab28c561996-06-11 18:33:14 +0000482 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000483 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000484 return pattern;
485 }
486
Barry Warsawc3573251996-12-20 21:56:07 +0000487 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
488 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000489 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000490
491 while (o < oend) {
492 if (*o == '(' && escaped == require_escape) {
493 char *backtrack;
494 escaped = 0;
495 ++group_count;
496 *n++ = *o;
497 if (++o >= oend || *o != '<')
498 continue;
499 /* *o == '<' */
500 if (o+1 < oend && *(o+1) == '>')
501 continue;
502 backtrack = o;
503 g = name_buf;
504 for (++o; o < oend;) {
505 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000506 PyObject *group_name = NULL;
507 PyObject *group_index = NULL;
508 *g++ = '\0';
509 group_name = PyString_FromString(name_buf);
510 group_index = PyInt_FromLong(group_count);
511 if (group_name == NULL ||
512 group_index == NULL ||
513 PyDict_SetItem(gdict, group_name,
514 group_index) != 0)
515 {
516 Py_XDECREF(group_name);
517 Py_XDECREF(group_index);
518 Py_XDECREF(npattern);
519 return NULL;
520 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000521 Py_DECREF(group_name);
522 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000523 ++o; /* eat the '>' */
524 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000525 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000526 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000527 o = backtrack;
528 break;
529 }
530 *g++ = *o++;
531 }
532 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000533 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000534 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000535 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000536 *n++ = *o;
537 while (o < oend && *o != ']') {
538 ++o;
539 *n++ = *o;
540 }
541 if (o < oend)
542 ++o;
543 }
544 else if (*o == '\\') {
545 escaped = 1;
546 *n++ = *o;
547 ++o;
548 }
549 else {
550 escaped = 0;
551 *n++ = *o;
552 ++o;
553 }
554 }
555
Barry Warsawc3573251996-12-20 21:56:07 +0000556 if (!(v = PyString_AsString(npattern))) {
557 Py_DECREF(npattern);
558 return NULL;
559 }
560 /* _PyString_Resize() decrements npattern on failure */
561 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000562 return npattern;
563 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000564 return NULL;
565 }
566
567}
568
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000569static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000570regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000571 PyObject *self;
572 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000573{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000574 PyObject *pattern;
575 PyObject *tran = NULL;
576 PyObject *gdict = NULL;
577 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000578 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000579
580 if (!PyArg_ParseTuple(args, "S|S", &pattern, &tran))
581 return NULL;
582
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000583 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000584 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000585 Py_DECREF(gdict);
586 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000587 return NULL;
588 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000589 retval = newregexobject(npattern, tran, pattern, gdict);
590 Py_DECREF(npattern);
591 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000592}
593
594
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000595static PyObject *cache_pat;
596static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000597
598static int
599update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000600 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000601{
Barry Warsawc3573251996-12-20 21:56:07 +0000602 PyObject *tuple = Py_BuildValue("(O)", pat);
603 int status = 0;
604
605 if (!tuple)
606 return -1;
607
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000608 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000609 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000610 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000611 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000612 cache_prog = regex_compile((PyObject *)NULL, tuple);
613 if (cache_prog == NULL) {
614 status = -1;
615 goto finally;
616 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000617 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000618 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000619 }
Barry Warsawc3573251996-12-20 21:56:07 +0000620 finally:
621 Py_DECREF(tuple);
622 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000623}
624
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000625static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000626regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000627 PyObject *self;
628 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000629{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000630 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000631 PyObject *tuple, *v;
632
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000633 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000634 return NULL;
635 if (update_cache(pat) < 0)
636 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000637
638 if (!(tuple = Py_BuildValue("(S)", string)))
639 return NULL;
640 v = regobj_match((regexobject *)cache_prog, tuple);
641 Py_DECREF(tuple);
642 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000643}
644
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000645static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000646regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000647 PyObject *self;
648 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000649{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000650 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000651 PyObject *tuple, *v;
652
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000653 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000654 return NULL;
655 if (update_cache(pat) < 0)
656 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000657
658 if (!(tuple = Py_BuildValue("(S)", string)))
659 return NULL;
660 v = regobj_search((regexobject *)cache_prog, tuple);
661 Py_DECREF(tuple);
662 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000663}
664
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000665static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000666regex_set_syntax(self, args)
Barry Warsawc3573251996-12-20 21:56:07 +0000667 PyObject *self;
668 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000669{
670 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000671 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000672 return NULL;
673 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000674 /* wipe the global pattern cache */
675 Py_XDECREF(cache_pat);
676 cache_pat = NULL;
677 Py_XDECREF(cache_prog);
678 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000679 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000680}
681
Barry Warsaw909d7c31997-02-18 18:48:50 +0000682static PyObject *
683regex_get_syntax(self, args)
684 PyObject *self;
685 PyObject *args;
686{
687 if (!PyArg_Parse(args, ""))
688 return NULL;
689 return PyInt_FromLong((long)re_syntax);
690}
691
692
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000693static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000694 {"compile", regex_compile, 1},
695 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000696 {"match", regex_match, 0},
697 {"search", regex_search, 0},
698 {"set_syntax", regex_set_syntax, 0},
Barry Warsaw909d7c31997-02-18 18:48:50 +0000699 {"get_syntax", regex_get_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000700 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000701};
702
Guido van Rossum8f3032d1996-08-19 22:03:12 +0000703void
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000704initregex()
705{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000706 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000707 int i;
708 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000709
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000710 m = Py_InitModule("regex", regex_global_methods);
711 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000712
713 /* Initialize regex.error exception */
Barry Warsawc3573251996-12-20 21:56:07 +0000714 v = RegexError = PyString_FromString("regex.error");
715 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
716 goto finally;
717
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000718 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000719 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
720 goto finally;
721
722 if (!(s = PyString_AsString(v)))
723 goto finally;
724
725 for (i = 0; i < 256; i++) {
726 if (isupper(i))
727 s[i] = tolower(i);
728 else
729 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000730 }
Barry Warsawc3573251996-12-20 21:56:07 +0000731 if (PyDict_SetItemString(d, "casefold", v) < 0)
732 goto finally;
733 Py_DECREF(v);
734
735 if (!PyErr_Occurred())
736 return;
737 finally:
738 Py_FatalError("can't initialize regex module");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000739}