blob: 981a6e96980c90eee1006b2668bb88d62ae1dc26 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000015both that copyright notice and this permission notice appear in
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000017Centrum or CWI or Corporation for National Research Initiatives or
18CNRI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior
20permission.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossumd266eb41996-10-25 14:44:06 +000022While CWI is the initial source for this software, a modified version
23is made available by the Corporation for National Research Initiatives
24(CNRI) at the Internet address ftp://ftp.python.org.
25
26STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33PERFORMANCE OF THIS SOFTWARE.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35******************************************************************/
36
37/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038/* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000040
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042
Guido van Rossuma376cc51996-12-05 23:43:35 +000043#include <ctype.h>
44
Guido van Rossum1cab95c1992-01-19 16:31:57 +000045#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000046
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000047static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048
49typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000050 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000051 struct re_pattern_buffer re_patbuf; /* The compiled expression */
52 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000053 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000054 PyObject *re_translate; /* String object for translate table */
55 PyObject *re_lastok; /* String object last matched/searched */
56 PyObject *re_groupindex; /* Group name to index dictionary */
57 PyObject *re_givenpat; /* Pattern with symbolic groups */
58 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000059} regexobject;
60
61/* Regex object methods */
62
63static void
64reg_dealloc(re)
65 regexobject *re;
66{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000067 PyMem_XDEL(re->re_patbuf.buffer);
68 Py_XDECREF(re->re_translate);
69 Py_XDECREF(re->re_lastok);
70 Py_XDECREF(re->re_groupindex);
71 Py_XDECREF(re->re_givenpat);
72 Py_XDECREF(re->re_realpat);
73 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000074}
75
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000076static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000077makeresult(regs)
78 struct re_registers *regs;
79{
Guido van Rossumc1962021996-10-08 14:18:42 +000080 PyObject *v;
81 int i;
82 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000083
Guido van Rossumc1962021996-10-08 14:18:42 +000084 if (filler == NULL) {
85 filler = Py_BuildValue("(ii)", -1, -1);
86 if (filler == NULL)
87 return NULL;
88 }
89 v = PyTuple_New(RE_NREGS);
90 if (v == NULL)
91 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000092
Guido van Rossumc1962021996-10-08 14:18:42 +000093 for (i = 0; i < RE_NREGS; i++) {
94 int lo = regs->start[i];
95 int hi = regs->end[i];
96 PyObject *w;
97 if (lo == -1 && hi == -1) {
98 w = filler;
99 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000100 }
Guido van Rossumc1962021996-10-08 14:18:42 +0000101 else
102 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +0000103 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
104 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +0000105 return NULL;
106 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000107 }
108 return v;
109}
110
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000111static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000112regobj_match(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000113 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000114 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000115{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000116 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000117 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000118 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000119 int result;
Barry Warsawc3573251996-12-20 21:56:07 +0000120
121 if (!PyArg_ParseTuple(args, "s#|i", &buffer, &size, &offset))
122 return NULL;
123
Guido van Rossum36d330b1993-02-21 20:12:16 +0000124 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000125 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000126 return NULL;
127 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000128 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000129 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000130 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000131 if (result < -1) {
132 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000133 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000134 return NULL;
135 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000136 if (result >= 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000137 PyObject* str = PyString_FromStringAndSize(buffer, size);
138 if (!str)
139 return NULL;
140 re->re_lastok = str;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000141 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000142 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000143}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000144
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000145static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000146regobj_search(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000147 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000148 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000149{
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000150 char *buffer;
151 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000152 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000153 int range;
154 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000155
Barry Warsawc3573251996-12-20 21:56:07 +0000156 if (!PyArg_ParseTuple(args, "s#|i", &buffer, &size, &offset))
157 return NULL;
158
Guido van Rossum36d330b1993-02-21 20:12:16 +0000159 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000160 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000161 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000162 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000163 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
164 the implementation don't match: the documentation states that
165 |range| positions are tried, while the code tries |range|+1
166 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000167 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000168 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000169 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000170 result = re_search(&re->re_patbuf, buffer, size, offset, range,
171 &re->re_regs);
172 if (result < -1) {
173 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000174 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000175 return NULL;
176 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000177 if (result >= 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000178 PyObject* str = PyString_FromStringAndSize(buffer, size);
179 if (!str)
180 return NULL;
181 re->re_lastok = str;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000182 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000183 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000184}
185
Barry Warsawc3573251996-12-20 21:56:07 +0000186/* get the group from the regex where index can be a string (group name) or
187 an integer index [0 .. 99]
188 */
189static PyObject*
190group_from_index(re, index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000191 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000192 PyObject *index;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000193{
194 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000195 char *v;
196
197 if (PyString_Check(index))
198 if (re->re_groupindex == NULL ||
199 !(index = PyDict_GetItem(re->re_groupindex, index)))
200 {
201 PyErr_SetString(RegexError,
202 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000203 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000204 }
Barry Warsawc3573251996-12-20 21:56:07 +0000205
206 i = PyInt_AsLong(index);
207 if (i == -1 && PyErr_Occurred())
208 return NULL;
209
Guido van Rossum36d330b1993-02-21 20:12:16 +0000210 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000211 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000212 return NULL;
213 }
214 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000215 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000216 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000217 return NULL;
218 }
219 a = re->re_regs.start[i];
220 b = re->re_regs.end[i];
221 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000222 Py_INCREF(Py_None);
223 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000224 }
Barry Warsawc3573251996-12-20 21:56:07 +0000225
226 if (!(v = PyString_AsString(re->re_lastok)))
227 return NULL;
228
229 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000230}
231
Barry Warsawc3573251996-12-20 21:56:07 +0000232
233static PyObject *
234regobj_group(re, args)
235 regexobject *re;
236 PyObject *args;
237{
238 int n = PyTuple_Size(args);
239 int i;
240 PyObject *res = NULL;
241
242 if (n < 0)
243 return NULL;
244 if (n == 0) {
245 PyErr_SetString(PyExc_TypeError, "not enough arguments");
246 return NULL;
247 }
248 if (n == 1) {
249 /* return value is a single string */
250 PyObject *index = PyTuple_GetItem(args, 0);
251 if (!index)
252 return NULL;
253
254 return group_from_index(re, index);
255 }
256
257 /* return value is a tuple */
258 if (!(res = PyTuple_New(n)))
259 return NULL;
260
261 for (i = 0; i < n; i++) {
262 PyObject *index = PyTuple_GetItem(args, i);
263 PyObject *group = NULL;
264
265 if (!index)
266 goto finally;
267 if (!(group = group_from_index(re, index)))
268 goto finally;
269 if (PyTuple_SetItem(res, i, group) < 0)
270 goto finally;
271 }
272 return res;
273
274 finally:
275 Py_DECREF(res);
276 return NULL;
277}
278
279
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000280static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000281 {"match", (PyCFunction)regobj_match, 1},
282 {"search", (PyCFunction)regobj_search, 1},
283 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000284 {NULL, NULL} /* sentinel */
285};
286
Barry Warsawc3573251996-12-20 21:56:07 +0000287
288
289static char* members[] = {
290 "last", "regs", "translate",
291 "groupindex", "realpat", "givenpat",
292 NULL
293};
294
295
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000296static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000297regobj_getattr(re, name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000298 regexobject *re;
299 char *name;
300{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000301 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000302 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000303 Py_INCREF(Py_None);
304 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000305 }
306 return makeresult(&re->re_regs);
307 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000308 if (strcmp(name, "last") == 0) {
309 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000310 Py_INCREF(Py_None);
311 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000312 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000313 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000314 return re->re_lastok;
315 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000316 if (strcmp(name, "translate") == 0) {
317 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000318 Py_INCREF(Py_None);
319 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000320 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000321 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000322 return re->re_translate;
323 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000324 if (strcmp(name, "groupindex") == 0) {
325 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000326 Py_INCREF(Py_None);
327 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000328 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000329 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000330 return re->re_groupindex;
331 }
332 if (strcmp(name, "realpat") == 0) {
333 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000334 Py_INCREF(Py_None);
335 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000336 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000337 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000338 return re->re_realpat;
339 }
340 if (strcmp(name, "givenpat") == 0) {
341 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000342 Py_INCREF(Py_None);
343 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000344 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000345 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000346 return re->re_givenpat;
347 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000348 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000349 int i = 0;
350 PyObject *list = NULL;
351
352 /* okay, so it's unlikely this list will change that often.
353 still, it's easier to change it in just one place.
354 */
355 while (members[i])
356 i++;
357 if (!(list = PyList_New(i)))
358 return NULL;
359
360 i = 0;
361 while (members[i]) {
362 PyObject* v = PyString_FromString(members[i]);
363 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000364 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000365 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000366 }
Barry Warsawc3573251996-12-20 21:56:07 +0000367 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000368 }
369 return list;
370 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000371 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000372}
373
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000374static PyTypeObject Regextype = {
375 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000376 0, /*ob_size*/
377 "regex", /*tp_name*/
378 sizeof(regexobject), /*tp_size*/
379 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000380 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000381 (destructor)reg_dealloc, /*tp_dealloc*/
382 0, /*tp_print*/
383 (getattrfunc)regobj_getattr, /*tp_getattr*/
384 0, /*tp_setattr*/
385 0, /*tp_compare*/
386 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000387};
388
Barry Warsawc3573251996-12-20 21:56:07 +0000389/* reference counting invariants:
390 pattern: borrowed
391 translate: borrowed
392 givenpat: borrowed
393 groupindex: transferred
394*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000395static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000396newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000397 PyObject *pattern;
398 PyObject *translate;
399 PyObject *givenpat;
400 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000401{
402 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000403 char *pat;
404 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000405
Barry Warsawc3573251996-12-20 21:56:07 +0000406 if (!PyArg_Parse(pattern, "s#", &pat, &size))
407 return NULL;
408
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000409 if (translate != NULL && PyString_Size(translate) != 256) {
410 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000411 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000412 return NULL;
413 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000414 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000415 if (re != NULL) {
416 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000417 re->re_patbuf.buffer = NULL;
418 re->re_patbuf.allocated = 0;
419 re->re_patbuf.fastmap = re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000420 if (translate) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000421 re->re_patbuf.translate = PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000422 if (!re->re_patbuf.translate)
423 goto finally;
424 Py_INCREF(translate);
425 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000426 else
427 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000428 re->re_translate = translate;
429 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000430 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000431 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000432 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000433 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000434 re->re_givenpat = givenpat;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000435 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000436 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000437 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000438 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000439 }
440 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000441 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000442 finally:
443 Py_DECREF(re);
444 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000445}
446
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000447static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000448regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000449 PyObject *self;
450 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000451{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000452 PyObject *pat = NULL;
453 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000454
455 if (!PyArg_ParseTuple(args, "S|S", &pat, &tran))
456 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000457 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000458}
459
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000460static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000461symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000462 PyObject *pattern;
463 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000464{
Barry Warsawc3573251996-12-20 21:56:07 +0000465 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000466 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000467 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000468 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000469 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000470 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000471 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
472
Barry Warsawc3573251996-12-20 21:56:07 +0000473 if (!(opat = PyString_AsString(pattern)))
474 return NULL;
475
476 if ((sz = PyString_Size(pattern)) < 0)
477 return NULL;
478
479 oend = opat + sz;
480 o = opat;
481
Guido van Rossumab28c561996-06-11 18:33:14 +0000482 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000483 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000484 return pattern;
485 }
486
Barry Warsawc3573251996-12-20 21:56:07 +0000487 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
488 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000489 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000490
491 while (o < oend) {
492 if (*o == '(' && escaped == require_escape) {
493 char *backtrack;
494 escaped = 0;
495 ++group_count;
496 *n++ = *o;
497 if (++o >= oend || *o != '<')
498 continue;
499 /* *o == '<' */
500 if (o+1 < oend && *(o+1) == '>')
501 continue;
502 backtrack = o;
503 g = name_buf;
504 for (++o; o < oend;) {
505 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000506 PyObject *group_name = NULL;
507 PyObject *group_index = NULL;
508 *g++ = '\0';
509 group_name = PyString_FromString(name_buf);
510 group_index = PyInt_FromLong(group_count);
511 if (group_name == NULL ||
512 group_index == NULL ||
513 PyDict_SetItem(gdict, group_name,
514 group_index) != 0)
515 {
516 Py_XDECREF(group_name);
517 Py_XDECREF(group_index);
518 Py_XDECREF(npattern);
519 return NULL;
520 }
521 ++o; /* eat the '>' */
522 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000523 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000524 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000525 o = backtrack;
526 break;
527 }
528 *g++ = *o++;
529 }
530 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000531 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000532 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000533 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000534 *n++ = *o;
535 while (o < oend && *o != ']') {
536 ++o;
537 *n++ = *o;
538 }
539 if (o < oend)
540 ++o;
541 }
542 else if (*o == '\\') {
543 escaped = 1;
544 *n++ = *o;
545 ++o;
546 }
547 else {
548 escaped = 0;
549 *n++ = *o;
550 ++o;
551 }
552 }
553
Barry Warsawc3573251996-12-20 21:56:07 +0000554 if (!(v = PyString_AsString(npattern))) {
555 Py_DECREF(npattern);
556 return NULL;
557 }
558 /* _PyString_Resize() decrements npattern on failure */
559 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000560 return npattern;
561 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000562 return NULL;
563 }
564
565}
566
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000567static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000568regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000569 PyObject *self;
570 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000571{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000572 PyObject *pattern;
573 PyObject *tran = NULL;
574 PyObject *gdict = NULL;
575 PyObject *npattern;
Barry Warsawc3573251996-12-20 21:56:07 +0000576
577 if (!PyArg_ParseTuple(args, "S|S", &pattern, &tran))
578 return NULL;
579
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000580 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000581 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000582 Py_DECREF(gdict);
583 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000584 return NULL;
585 }
586 return newregexobject(npattern, tran, pattern, gdict);
587}
588
589
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000590static PyObject *cache_pat;
591static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000592
593static int
594update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000595 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000596{
Barry Warsawc3573251996-12-20 21:56:07 +0000597 PyObject *tuple = Py_BuildValue("(O)", pat);
598 int status = 0;
599
600 if (!tuple)
601 return -1;
602
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000603 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000604 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000605 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000606 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000607 cache_prog = regex_compile((PyObject *)NULL, tuple);
608 if (cache_prog == NULL) {
609 status = -1;
610 goto finally;
611 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000612 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000613 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000614 }
Barry Warsawc3573251996-12-20 21:56:07 +0000615 finally:
616 Py_DECREF(tuple);
617 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000618}
619
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000620static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000621regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000622 PyObject *self;
623 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000624{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000625 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000626 PyObject *tuple, *v;
627
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000628 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000629 return NULL;
630 if (update_cache(pat) < 0)
631 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000632
633 if (!(tuple = Py_BuildValue("(S)", string)))
634 return NULL;
635 v = regobj_match((regexobject *)cache_prog, tuple);
636 Py_DECREF(tuple);
637 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000638}
639
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000640static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000641regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000642 PyObject *self;
643 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000644{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000645 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000646 PyObject *tuple, *v;
647
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000648 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000649 return NULL;
650 if (update_cache(pat) < 0)
651 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000652
653 if (!(tuple = Py_BuildValue("(S)", string)))
654 return NULL;
655 v = regobj_search((regexobject *)cache_prog, tuple);
656 Py_DECREF(tuple);
657 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000658}
659
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000660static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000661regex_set_syntax(self, args)
Barry Warsawc3573251996-12-20 21:56:07 +0000662 PyObject *self;
663 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000664{
665 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000666 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000667 return NULL;
668 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000669 /* wipe the global pattern cache */
670 Py_XDECREF(cache_pat);
671 cache_pat = NULL;
672 Py_XDECREF(cache_prog);
673 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000674 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000675}
676
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000677static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000678 {"compile", regex_compile, 1},
679 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000680 {"match", regex_match, 0},
681 {"search", regex_search, 0},
682 {"set_syntax", regex_set_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000683 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000684};
685
Guido van Rossum8f3032d1996-08-19 22:03:12 +0000686void
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000687initregex()
688{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000689 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000690 int i;
691 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000692
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000693 m = Py_InitModule("regex", regex_global_methods);
694 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000695
696 /* Initialize regex.error exception */
Barry Warsawc3573251996-12-20 21:56:07 +0000697 v = RegexError = PyString_FromString("regex.error");
698 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
699 goto finally;
700
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000701 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000702 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
703 goto finally;
704
705 if (!(s = PyString_AsString(v)))
706 goto finally;
707
708 for (i = 0; i < 256; i++) {
709 if (isupper(i))
710 s[i] = tolower(i);
711 else
712 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000713 }
Barry Warsawc3573251996-12-20 21:56:07 +0000714 if (PyDict_SetItemString(d, "casefold", v) < 0)
715 goto finally;
716 Py_DECREF(v);
717
718 if (!PyErr_Occurred())
719 return;
720 finally:
721 Py_FatalError("can't initialize regex module");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000722}