blob: c9f9781fe93086fb0a9053635b37a9325029559a [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossum524b5881995-01-04 19:10:35 +00007Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
8The Netherlands.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00009
10 All Rights Reserved
11
Guido van Rossumd266eb41996-10-25 14:44:06 +000012Permission to use, copy, modify, and distribute this software and its
13documentation for any purpose and without fee is hereby granted,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014provided that the above copyright notice appear in all copies and that
Guido van Rossumd266eb41996-10-25 14:44:06 +000015both that copyright notice and this permission notice appear in
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000016supporting documentation, and that the names of Stichting Mathematisch
Guido van Rossumd266eb41996-10-25 14:44:06 +000017Centrum or CWI or Corporation for National Research Initiatives or
18CNRI not be used in advertising or publicity pertaining to
19distribution of the software without specific, written prior
20permission.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossumd266eb41996-10-25 14:44:06 +000022While CWI is the initial source for this software, a modified version
23is made available by the Corporation for National Research Initiatives
24(CNRI) at the Internet address ftp://ftp.python.org.
25
26STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
27REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
28MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
29CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
30DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
31PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
32TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
33PERFORMANCE OF THIS SOFTWARE.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000034
35******************************************************************/
36
37/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000038/* This uses Tatu Ylonen's copyleft-free reimplementation of
39 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000040
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000041#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000042
Guido van Rossuma376cc51996-12-05 23:43:35 +000043#include <ctype.h>
44
Guido van Rossum1cab95c1992-01-19 16:31:57 +000045#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000046
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000047static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000048
49typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000050 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000051 struct re_pattern_buffer re_patbuf; /* The compiled expression */
52 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000053 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000054 PyObject *re_translate; /* String object for translate table */
55 PyObject *re_lastok; /* String object last matched/searched */
56 PyObject *re_groupindex; /* Group name to index dictionary */
57 PyObject *re_givenpat; /* Pattern with symbolic groups */
58 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000059} regexobject;
60
61/* Regex object methods */
62
63static void
64reg_dealloc(re)
65 regexobject *re;
66{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000067 PyMem_XDEL(re->re_patbuf.buffer);
68 Py_XDECREF(re->re_translate);
69 Py_XDECREF(re->re_lastok);
70 Py_XDECREF(re->re_groupindex);
71 Py_XDECREF(re->re_givenpat);
72 Py_XDECREF(re->re_realpat);
73 PyMem_DEL(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000074}
75
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000076static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000077makeresult(regs)
78 struct re_registers *regs;
79{
Guido van Rossumc1962021996-10-08 14:18:42 +000080 PyObject *v;
81 int i;
82 static PyObject *filler = NULL;
83 if (filler == NULL) {
84 filler = Py_BuildValue("(ii)", -1, -1);
85 if (filler == NULL)
86 return NULL;
87 }
88 v = PyTuple_New(RE_NREGS);
89 if (v == NULL)
90 return NULL;
91 for (i = 0; i < RE_NREGS; i++) {
92 int lo = regs->start[i];
93 int hi = regs->end[i];
94 PyObject *w;
95 if (lo == -1 && hi == -1) {
96 w = filler;
97 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000098 }
Guido van Rossumc1962021996-10-08 14:18:42 +000099 else
100 w = Py_BuildValue("(ii)", lo, hi);
101 if (w == NULL) {
102 Py_XDECREF(v);
103 return NULL;
104 }
105 PyTuple_SetItem(v, i, w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000106 }
107 return v;
108}
109
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000110static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000111reg_match(re, args)
112 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000113 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000114{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000115 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000116 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000117 int size;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000118 int offset;
119 int result;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000120 if (PyArg_Parse(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000121 offset = 0;
122 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000123 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000124 PyErr_Clear();
125 if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000126 return NULL;
127 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000128 buffer = PyString_AsString(argstring);
129 size = PyString_Size(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000130 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000131 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000132 return NULL;
133 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000134 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000135 re->re_lastok = NULL;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000136 result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000137 if (result < -1) {
138 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000139 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000140 return NULL;
141 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000142 if (result >= 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000143 Py_INCREF(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000144 re->re_lastok = argstring;
145 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000146 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000147}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000148
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000149static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000150reg_search(re, args)
151 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000152 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000153{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000154 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000155 char *buffer;
156 int size;
157 int offset;
158 int range;
159 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000160
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000161 if (PyArg_Parse(args, "S", &argstring)) {
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000162 offset = 0;
163 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000164 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000165 PyErr_Clear();
166 if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000167 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000168 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000169 buffer = PyString_AsString(argstring);
170 size = PyString_Size(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000171 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000172 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000173 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000174 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000175 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
176 the implementation don't match: the documentation states that
177 |range| positions are tried, while the code tries |range|+1
178 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000179 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000180 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000181 re->re_lastok = NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000182 result = re_search(&re->re_patbuf, buffer, size, offset, range,
183 &re->re_regs);
184 if (result < -1) {
185 /* Failure like stack overflow */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000186 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000187 return NULL;
188 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000189 if (result >= 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000190 Py_INCREF(argstring);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000191 re->re_lastok = argstring;
192 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000193 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000194}
195
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000196static PyObject *
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000197reg_group(re, args)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000198 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000199 PyObject *args;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000200{
201 int i, a, b;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000202 if (args != NULL && PyTuple_Check(args)) {
203 int n = PyTuple_Size(args);
204 PyObject *res = PyTuple_New(n);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000205 if (res == NULL)
206 return NULL;
207 for (i = 0; i < n; i++) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000208 PyObject *v = reg_group(re, PyTuple_GetItem(args, i));
Guido van Rossum36d330b1993-02-21 20:12:16 +0000209 if (v == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000210 Py_DECREF(res);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000211 return NULL;
212 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000213 PyTuple_SetItem(res, i, v);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000214 }
215 return res;
216 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000217 if (!PyArg_Parse(args, "i", &i)) {
218 PyObject *n;
219 PyErr_Clear();
220 if (!PyArg_Parse(args, "S", &n))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000221 return NULL;
222 else {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000223 PyObject *index;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000224 if (re->re_groupindex == NULL)
225 index = NULL;
226 else
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000227 index = PyDict_GetItem(re->re_groupindex, n);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000228 if (index == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000229 PyErr_SetString(RegexError, "group() group name doesn't exist");
Guido van Rossumb6775db1994-08-01 11:34:53 +0000230 return NULL;
231 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000232 i = PyInt_AsLong(index);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000233 }
234 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000235 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000236 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000237 return NULL;
238 }
239 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000240 PyErr_SetString(RegexError,
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000241 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000242 return NULL;
243 }
244 a = re->re_regs.start[i];
245 b = re->re_regs.end[i];
246 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000247 Py_INCREF(Py_None);
248 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000249 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000250 return PyString_FromStringAndSize(PyString_AsString(re->re_lastok)+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000251}
252
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000253static struct PyMethodDef reg_methods[] = {
254 {"match", (PyCFunction)reg_match},
255 {"search", (PyCFunction)reg_search},
256 {"group", (PyCFunction)reg_group},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000257 {NULL, NULL} /* sentinel */
258};
259
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000260static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000261reg_getattr(re, name)
262 regexobject *re;
263 char *name;
264{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000265 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000266 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000267 Py_INCREF(Py_None);
268 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000269 }
270 return makeresult(&re->re_regs);
271 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000272 if (strcmp(name, "last") == 0) {
273 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000274 Py_INCREF(Py_None);
275 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000276 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000277 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000278 return re->re_lastok;
279 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000280 if (strcmp(name, "translate") == 0) {
281 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000282 Py_INCREF(Py_None);
283 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000284 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000285 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000286 return re->re_translate;
287 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000288 if (strcmp(name, "groupindex") == 0) {
289 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000290 Py_INCREF(Py_None);
291 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000292 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000293 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000294 return re->re_groupindex;
295 }
296 if (strcmp(name, "realpat") == 0) {
297 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000298 Py_INCREF(Py_None);
299 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000300 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000301 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000302 return re->re_realpat;
303 }
304 if (strcmp(name, "givenpat") == 0) {
305 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000306 Py_INCREF(Py_None);
307 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000308 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000309 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000310 return re->re_givenpat;
311 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000312 if (strcmp(name, "__members__") == 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000313 PyObject *list = PyList_New(6);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000314 if (list) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000315 PyList_SetItem(list, 0, PyString_FromString("last"));
316 PyList_SetItem(list, 1, PyString_FromString("regs"));
317 PyList_SetItem(list, 2, PyString_FromString("translate"));
318 PyList_SetItem(list, 3, PyString_FromString("groupindex"));
319 PyList_SetItem(list, 4, PyString_FromString("realpat"));
320 PyList_SetItem(list, 5, PyString_FromString("givenpat"));
321 if (PyErr_Occurred()) {
322 Py_DECREF(list);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000323 list = NULL;
324 }
325 }
326 return list;
327 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000328 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000329}
330
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000331static PyTypeObject Regextype = {
332 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000333 0, /*ob_size*/
334 "regex", /*tp_name*/
335 sizeof(regexobject), /*tp_size*/
336 0, /*tp_itemsize*/
337 /* methods */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000338 (destructor)reg_dealloc, /*tp_dealloc*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000339 0, /*tp_print*/
Guido van Rossumb6775db1994-08-01 11:34:53 +0000340 (getattrfunc)reg_getattr, /*tp_getattr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000341 0, /*tp_setattr*/
342 0, /*tp_compare*/
343 0, /*tp_repr*/
344};
345
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000346static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000347newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000348 PyObject *pattern;
349 PyObject *translate;
350 PyObject *givenpat;
351 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000352{
353 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000354 char *pat = PyString_AsString(pattern);
355 int size = PyString_Size(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000356
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000357 if (translate != NULL && PyString_Size(translate) != 256) {
358 PyErr_SetString(RegexError,
Guido van Rossum36d330b1993-02-21 20:12:16 +0000359 "translation table must be 256 bytes");
360 return NULL;
361 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000362 re = PyObject_NEW(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000363 if (re != NULL) {
364 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000365 re->re_patbuf.buffer = NULL;
366 re->re_patbuf.allocated = 0;
367 re->re_patbuf.fastmap = re->re_fastmap;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000368 if (translate)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000369 re->re_patbuf.translate = PyString_AsString(translate);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000370 else
371 re->re_patbuf.translate = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000372 Py_XINCREF(translate);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000373 re->re_translate = translate;
374 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000375 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000376 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000377 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000378 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000379 re->re_givenpat = givenpat;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000380 error = re_compile_pattern(pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000381 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000382 PyErr_SetString(RegexError, error);
383 Py_DECREF(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000384 re = NULL;
385 }
386 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000387 return (PyObject *)re;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000388}
389
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000390static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000391regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000392 PyObject *self;
393 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000394{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000395 PyObject *pat = NULL;
396 PyObject *tran = NULL;
397 if (!PyArg_Parse(args, "S", &pat)) {
398 PyErr_Clear();
399 if (!PyArg_Parse(args, "(SS)", &pat, &tran))
Guido van Rossum36d330b1993-02-21 20:12:16 +0000400 return NULL;
401 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000402 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000403}
404
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000405static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000406symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000407 PyObject *pattern;
408 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000409{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000410 char *opat = PyString_AsString(pattern);
411 char *oend = opat + PyString_Size(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000412 int group_count = 0;
413 int escaped = 0;
414 char *o = opat;
415 char *n;
416 char name_buf[128];
417 char *g;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000418 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000419 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
420
Guido van Rossumab28c561996-06-11 18:33:14 +0000421 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000422 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000423 return pattern;
424 }
425
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000426 npattern = PyString_FromStringAndSize((char*)NULL, PyString_Size(pattern));
Guido van Rossumb6775db1994-08-01 11:34:53 +0000427 if (npattern == NULL)
428 return NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000429 n = PyString_AsString(npattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000430
431 while (o < oend) {
432 if (*o == '(' && escaped == require_escape) {
433 char *backtrack;
434 escaped = 0;
435 ++group_count;
436 *n++ = *o;
437 if (++o >= oend || *o != '<')
438 continue;
439 /* *o == '<' */
440 if (o+1 < oend && *(o+1) == '>')
441 continue;
442 backtrack = o;
443 g = name_buf;
444 for (++o; o < oend;) {
445 if (*o == '>') {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000446 PyObject *group_name = NULL;
447 PyObject *group_index = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000448 *g++ = '\0';
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000449 group_name = PyString_FromString(name_buf);
450 group_index = PyInt_FromLong(group_count);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000451 if (group_name == NULL || group_index == NULL
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000452 || PyDict_SetItem(gdict, group_name, group_index) != 0) {
453 Py_XDECREF(group_name);
454 Py_XDECREF(group_index);
455 Py_XDECREF(npattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000456 return NULL;
457 }
458 ++o; /* eat the '>' */
459 break;
460 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000461 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000462 o = backtrack;
463 break;
464 }
465 *g++ = *o++;
466 }
467 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000468 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000469 *n++ = *o;
470 ++o; /* eat the char following '[' */
471 *n++ = *o;
472 while (o < oend && *o != ']') {
473 ++o;
474 *n++ = *o;
475 }
476 if (o < oend)
477 ++o;
478 }
479 else if (*o == '\\') {
480 escaped = 1;
481 *n++ = *o;
482 ++o;
483 }
484 else {
485 escaped = 0;
486 *n++ = *o;
487 ++o;
488 }
489 }
490
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000491 if (_PyString_Resize(&npattern, n - PyString_AsString(npattern)) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000492 return npattern;
493 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000494 return NULL;
495 }
496
497}
498
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000499static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000500regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000501 PyObject *self;
502 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000503{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000504 PyObject *pattern;
505 PyObject *tran = NULL;
506 PyObject *gdict = NULL;
507 PyObject *npattern;
508 if (!PyArg_Parse(args, "S", &pattern)) {
509 PyErr_Clear();
510 if (!PyArg_Parse(args, "(SS)", &pattern, &tran))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000511 return NULL;
512 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000513 gdict = PyDict_New();
Guido van Rossumb6775db1994-08-01 11:34:53 +0000514 if (gdict == NULL
515 || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000516 Py_DECREF(gdict);
517 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000518 return NULL;
519 }
520 return newregexobject(npattern, tran, pattern, gdict);
521}
522
523
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000524static PyObject *cache_pat;
525static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000526
527static int
528update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000529 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000530{
531 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000532 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000533 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000534 Py_XDECREF(cache_prog);
535 cache_prog = regex_compile((PyObject *)NULL, pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000536 if (cache_prog == NULL)
537 return -1;
538 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000539 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000540 }
541 return 0;
542}
543
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000544static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000545regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000546 PyObject *self;
547 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000548{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000549 PyObject *pat, *string;
550 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000551 return NULL;
552 if (update_cache(pat) < 0)
553 return NULL;
554 return reg_match((regexobject *)cache_prog, string);
555}
556
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000557static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000558regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000559 PyObject *self;
560 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000561{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000562 PyObject *pat, *string;
563 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000564 return NULL;
565 if (update_cache(pat) < 0)
566 return NULL;
567 return reg_search((regexobject *)cache_prog, string);
568}
569
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000570static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000571regex_set_syntax(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000572 PyObject *self, *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000573{
574 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000575 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000576 return NULL;
577 syntax = re_set_syntax(syntax);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000578 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000579}
580
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000581static struct PyMethodDef regex_global_methods[] = {
Guido van Rossum295d1711995-02-19 15:55:19 +0000582 {"compile", regex_compile, 0},
583 {"symcomp", regex_symcomp, 0},
584 {"match", regex_match, 0},
585 {"search", regex_search, 0},
586 {"set_syntax", regex_set_syntax, 0},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000587 {NULL, NULL} /* sentinel */
588};
589
Guido van Rossum8f3032d1996-08-19 22:03:12 +0000590void
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000591initregex()
592{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000593 PyObject *m, *d, *v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000594
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000595 m = Py_InitModule("regex", regex_global_methods);
596 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000597
598 /* Initialize regex.error exception */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000599 RegexError = PyString_FromString("regex.error");
600 if (RegexError == NULL || PyDict_SetItemString(d, "error", RegexError) != 0)
601 Py_FatalError("can't define regex.error");
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000602
603 /* Initialize regex.casefold constant */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000604 v = PyString_FromStringAndSize((char *)NULL, 256);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000605 if (v != NULL) {
606 int i;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000607 char *s = PyString_AsString(v);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000608 for (i = 0; i < 256; i++) {
609 if (isupper(i))
610 s[i] = tolower(i);
611 else
612 s[i] = i;
613 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000614 PyDict_SetItemString(d, "casefold", v);
615 Py_DECREF(v);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000616 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000617}