blob: 2455b0639c22589b5487d22ac2aba5d438fb36a3 [file] [log] [blame]
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00001/*
Guido van Rossum6f4c43d1991-12-30 01:42:57 +00002XXX support range parameter on search
3XXX support mstop parameter on search
4*/
5
6/***********************************************************
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007Copyright (c) 2000, BeOpen.com.
8Copyright (c) 1995-2000, Corporation for National Research Initiatives.
9Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
10All rights reserved.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000011
Guido van Rossumfd71b9e2000-06-30 23:50:40 +000012See the file "Misc/COPYRIGHT" for information on usage and
13redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000014******************************************************************/
15
16/* Regular expression objects */
Guido van Rossum1cab95c1992-01-19 16:31:57 +000017/* This uses Tatu Ylonen's copyleft-free reimplementation of
18 GNU regular expressions */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000019
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000020#include "Python.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000021
Guido van Rossuma376cc51996-12-05 23:43:35 +000022#include <ctype.h>
23
Guido van Rossum1cab95c1992-01-19 16:31:57 +000024#include "regexpr.h"
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000025
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000026static PyObject *RegexError; /* Exception */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000027
28typedef struct {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000029 PyObject_HEAD
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000030 struct re_pattern_buffer re_patbuf; /* The compiled expression */
31 struct re_registers re_regs; /* The registers from the last match */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000032 char re_fastmap[256]; /* Storage for fastmap */
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000033 PyObject *re_translate; /* String object for translate table */
34 PyObject *re_lastok; /* String object last matched/searched */
35 PyObject *re_groupindex; /* Group name to index dictionary */
36 PyObject *re_givenpat; /* Pattern with symbolic groups */
37 PyObject *re_realpat; /* Pattern without symbolic groups */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000038} regexobject;
39
40/* Regex object methods */
41
42static void
43reg_dealloc(re)
44 regexobject *re;
45{
Guido van Rossumb18618d2000-05-03 23:44:39 +000046 if (re->re_patbuf.buffer)
47 PyMem_DEL(re->re_patbuf.buffer);
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000048 Py_XDECREF(re->re_translate);
49 Py_XDECREF(re->re_lastok);
50 Py_XDECREF(re->re_groupindex);
51 Py_XDECREF(re->re_givenpat);
52 Py_XDECREF(re->re_realpat);
Guido van Rossumb18618d2000-05-03 23:44:39 +000053 PyObject_Del(re);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000054}
55
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000056static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000057makeresult(regs)
58 struct re_registers *regs;
59{
Guido van Rossumc1962021996-10-08 14:18:42 +000060 PyObject *v;
61 int i;
62 static PyObject *filler = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000063
Guido van Rossumc1962021996-10-08 14:18:42 +000064 if (filler == NULL) {
65 filler = Py_BuildValue("(ii)", -1, -1);
66 if (filler == NULL)
67 return NULL;
68 }
69 v = PyTuple_New(RE_NREGS);
70 if (v == NULL)
71 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +000072
Guido van Rossumc1962021996-10-08 14:18:42 +000073 for (i = 0; i < RE_NREGS; i++) {
74 int lo = regs->start[i];
75 int hi = regs->end[i];
76 PyObject *w;
77 if (lo == -1 && hi == -1) {
78 w = filler;
79 Py_INCREF(w);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000080 }
Guido van Rossumc1962021996-10-08 14:18:42 +000081 else
82 w = Py_BuildValue("(ii)", lo, hi);
Barry Warsawc3573251996-12-20 21:56:07 +000083 if (w == NULL || PyTuple_SetItem(v, i, w) < 0) {
84 Py_DECREF(v);
Guido van Rossumc1962021996-10-08 14:18:42 +000085 return NULL;
86 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000087 }
88 return v;
89}
90
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000091static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +000092regobj_match(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000093 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +000094 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000095{
Guido van Rossum4a807f51997-05-12 16:04:09 +000096 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +000097 char *buffer;
Guido van Rossumd577c0c1992-01-27 16:46:19 +000098 int size;
Barry Warsawc3573251996-12-20 21:56:07 +000099 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000100 int result;
Barry Warsawc3573251996-12-20 21:56:07 +0000101
Guido van Rossum43713e52000-02-29 13:59:29 +0000102 if (!PyArg_ParseTuple(args, "O|i:match", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +0000103 return NULL;
Guido van Rossum7e488981998-10-08 02:25:24 +0000104 if (!PyArg_Parse(argstring, "t#", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000105 return NULL;
106
Guido van Rossum36d330b1993-02-21 20:12:16 +0000107 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000108 PyErr_SetString(RegexError, "match offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000109 return NULL;
110 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000111 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000112 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000113 result = _Py_re_match(&re->re_patbuf, (unsigned char *)buffer, size, offset,
Guido van Rossum0318bd61997-08-14 14:35:12 +0000114 &re->re_regs);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000115 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000116 /* Serious failure of some sort; if re_match didn't
117 set an exception, raise a generic error */
118 if (!PyErr_Occurred())
119 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000120 return NULL;
121 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000122 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000123 Py_INCREF(argstring);
124 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000125 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000126 return PyInt_FromLong((long)result); /* Length of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000127}
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000128
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000129static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000130regobj_search(re, args)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000131 regexobject *re;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000132 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000133{
Guido van Rossum4a807f51997-05-12 16:04:09 +0000134 PyObject *argstring;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000135 char *buffer;
136 int size;
Barry Warsawc3573251996-12-20 21:56:07 +0000137 int offset = 0;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000138 int range;
139 int result;
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000140
Guido van Rossum43713e52000-02-29 13:59:29 +0000141 if (!PyArg_ParseTuple(args, "O|i:search", &argstring, &offset))
Guido van Rossum4a807f51997-05-12 16:04:09 +0000142 return NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000143 if (!PyArg_Parse(argstring, "t#:search", &buffer, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000144 return NULL;
145
Guido van Rossum36d330b1993-02-21 20:12:16 +0000146 if (offset < 0 || offset > size) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000147 PyErr_SetString(RegexError, "search offset out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000148 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000149 }
Guido van Rossumd577c0c1992-01-27 16:46:19 +0000150 /* NB: In Emacs 18.57, the documentation for re_search[_2] and
151 the implementation don't match: the documentation states that
152 |range| positions are tried, while the code tries |range|+1
153 positions. It seems more productive to believe the code! */
Guido van Rossum2d785901992-01-26 18:12:41 +0000154 range = size - offset;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000155 Py_XDECREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000156 re->re_lastok = NULL;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000157 result = _Py_re_search(&re->re_patbuf, (unsigned char *)buffer, size, offset, range,
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000158 &re->re_regs);
159 if (result < -1) {
Guido van Rossum95e80531997-08-13 22:34:14 +0000160 /* Serious failure of some sort; if re_match didn't
161 set an exception, raise a generic error */
162 if (!PyErr_Occurred())
163 PyErr_SetString(RegexError, "match failure");
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000164 return NULL;
165 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000166 if (result >= 0) {
Guido van Rossum4a807f51997-05-12 16:04:09 +0000167 Py_INCREF(argstring);
168 re->re_lastok = argstring;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000169 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000170 return PyInt_FromLong((long)result); /* Position of the match or -1 */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000171}
172
Barry Warsawc3573251996-12-20 21:56:07 +0000173/* get the group from the regex where index can be a string (group name) or
174 an integer index [0 .. 99]
175 */
176static PyObject*
177group_from_index(re, index)
Guido van Rossum36d330b1993-02-21 20:12:16 +0000178 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000179 PyObject *index;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000180{
181 int i, a, b;
Barry Warsawc3573251996-12-20 21:56:07 +0000182 char *v;
183
184 if (PyString_Check(index))
185 if (re->re_groupindex == NULL ||
186 !(index = PyDict_GetItem(re->re_groupindex, index)))
187 {
188 PyErr_SetString(RegexError,
189 "group() group name doesn't exist");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000190 return NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000191 }
Barry Warsawc3573251996-12-20 21:56:07 +0000192
193 i = PyInt_AsLong(index);
194 if (i == -1 && PyErr_Occurred())
195 return NULL;
196
Guido van Rossum36d330b1993-02-21 20:12:16 +0000197 if (i < 0 || i >= RE_NREGS) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000198 PyErr_SetString(RegexError, "group() index out of range");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000199 return NULL;
200 }
201 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000202 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000203 "group() only valid after successful match/search");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000204 return NULL;
205 }
206 a = re->re_regs.start[i];
207 b = re->re_regs.end[i];
208 if (a < 0 || b < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000209 Py_INCREF(Py_None);
210 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000211 }
Barry Warsawc3573251996-12-20 21:56:07 +0000212
213 if (!(v = PyString_AsString(re->re_lastok)))
214 return NULL;
215
216 return PyString_FromStringAndSize(v+a, b-a);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000217}
218
Barry Warsawc3573251996-12-20 21:56:07 +0000219
220static PyObject *
221regobj_group(re, args)
222 regexobject *re;
223 PyObject *args;
224{
225 int n = PyTuple_Size(args);
226 int i;
227 PyObject *res = NULL;
228
229 if (n < 0)
230 return NULL;
231 if (n == 0) {
232 PyErr_SetString(PyExc_TypeError, "not enough arguments");
233 return NULL;
234 }
235 if (n == 1) {
236 /* return value is a single string */
237 PyObject *index = PyTuple_GetItem(args, 0);
238 if (!index)
239 return NULL;
240
241 return group_from_index(re, index);
242 }
243
244 /* return value is a tuple */
245 if (!(res = PyTuple_New(n)))
246 return NULL;
247
248 for (i = 0; i < n; i++) {
249 PyObject *index = PyTuple_GetItem(args, i);
250 PyObject *group = NULL;
251
252 if (!index)
253 goto finally;
254 if (!(group = group_from_index(re, index)))
255 goto finally;
256 if (PyTuple_SetItem(res, i, group) < 0)
257 goto finally;
258 }
259 return res;
260
261 finally:
262 Py_DECREF(res);
263 return NULL;
264}
265
266
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000267static struct PyMethodDef reg_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000268 {"match", (PyCFunction)regobj_match, 1},
269 {"search", (PyCFunction)regobj_search, 1},
270 {"group", (PyCFunction)regobj_group, 1},
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000271 {NULL, NULL} /* sentinel */
272};
273
Barry Warsawc3573251996-12-20 21:56:07 +0000274
275
276static char* members[] = {
277 "last", "regs", "translate",
278 "groupindex", "realpat", "givenpat",
279 NULL
280};
281
282
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000283static PyObject *
Barry Warsawc3573251996-12-20 21:56:07 +0000284regobj_getattr(re, name)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000285 regexobject *re;
286 char *name;
287{
Guido van Rossumb824fc61992-01-01 14:52:16 +0000288 if (strcmp(name, "regs") == 0) {
Guido van Rossum36d330b1993-02-21 20:12:16 +0000289 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000290 Py_INCREF(Py_None);
291 return Py_None;
Guido van Rossumb824fc61992-01-01 14:52:16 +0000292 }
293 return makeresult(&re->re_regs);
294 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000295 if (strcmp(name, "last") == 0) {
296 if (re->re_lastok == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000297 Py_INCREF(Py_None);
298 return Py_None;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000299 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000300 Py_INCREF(re->re_lastok);
Guido van Rossum36d330b1993-02-21 20:12:16 +0000301 return re->re_lastok;
302 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000303 if (strcmp(name, "translate") == 0) {
304 if (re->re_translate == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000305 Py_INCREF(Py_None);
306 return Py_None;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000307 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000308 Py_INCREF(re->re_translate);
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000309 return re->re_translate;
310 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000311 if (strcmp(name, "groupindex") == 0) {
312 if (re->re_groupindex == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000313 Py_INCREF(Py_None);
314 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000315 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000316 Py_INCREF(re->re_groupindex);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000317 return re->re_groupindex;
318 }
319 if (strcmp(name, "realpat") == 0) {
320 if (re->re_realpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000321 Py_INCREF(Py_None);
322 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000323 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000324 Py_INCREF(re->re_realpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000325 return re->re_realpat;
326 }
327 if (strcmp(name, "givenpat") == 0) {
328 if (re->re_givenpat == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000329 Py_INCREF(Py_None);
330 return Py_None;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000331 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000332 Py_INCREF(re->re_givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000333 return re->re_givenpat;
334 }
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000335 if (strcmp(name, "__members__") == 0) {
Barry Warsawc3573251996-12-20 21:56:07 +0000336 int i = 0;
337 PyObject *list = NULL;
338
339 /* okay, so it's unlikely this list will change that often.
340 still, it's easier to change it in just one place.
341 */
342 while (members[i])
343 i++;
344 if (!(list = PyList_New(i)))
345 return NULL;
346
347 i = 0;
348 while (members[i]) {
349 PyObject* v = PyString_FromString(members[i]);
350 if (!v || PyList_SetItem(list, i, v) < 0) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000351 Py_DECREF(list);
Barry Warsawc3573251996-12-20 21:56:07 +0000352 return NULL;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000353 }
Barry Warsawc3573251996-12-20 21:56:07 +0000354 i++;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000355 }
356 return list;
357 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000358 return Py_FindMethod(reg_methods, (PyObject *)re, name);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000359}
360
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000361static PyTypeObject Regextype = {
362 PyObject_HEAD_INIT(&PyType_Type)
Barry Warsawc3573251996-12-20 21:56:07 +0000363 0, /*ob_size*/
364 "regex", /*tp_name*/
365 sizeof(regexobject), /*tp_size*/
366 0, /*tp_itemsize*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000367 /* methods */
Barry Warsawc3573251996-12-20 21:56:07 +0000368 (destructor)reg_dealloc, /*tp_dealloc*/
369 0, /*tp_print*/
370 (getattrfunc)regobj_getattr, /*tp_getattr*/
371 0, /*tp_setattr*/
372 0, /*tp_compare*/
373 0, /*tp_repr*/
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000374};
375
Barry Warsawc3573251996-12-20 21:56:07 +0000376/* reference counting invariants:
377 pattern: borrowed
378 translate: borrowed
379 givenpat: borrowed
380 groupindex: transferred
381*/
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000382static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000383newregexobject(pattern, translate, givenpat, groupindex)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000384 PyObject *pattern;
385 PyObject *translate;
386 PyObject *givenpat;
387 PyObject *groupindex;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000388{
389 regexobject *re;
Barry Warsawc3573251996-12-20 21:56:07 +0000390 char *pat;
391 int size;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000392
Guido van Rossum7e488981998-10-08 02:25:24 +0000393 if (!PyArg_Parse(pattern, "t#", &pat, &size))
Barry Warsawc3573251996-12-20 21:56:07 +0000394 return NULL;
395
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000396 if (translate != NULL && PyString_Size(translate) != 256) {
397 PyErr_SetString(RegexError,
Barry Warsawc3573251996-12-20 21:56:07 +0000398 "translation table must be 256 bytes");
Guido van Rossum36d330b1993-02-21 20:12:16 +0000399 return NULL;
400 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000401 re = PyObject_New(regexobject, &Regextype);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000402 if (re != NULL) {
403 char *error;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000404 re->re_patbuf.buffer = NULL;
405 re->re_patbuf.allocated = 0;
Guido van Rossumed2554a1997-08-18 15:31:24 +0000406 re->re_patbuf.fastmap = (unsigned char *)re->re_fastmap;
Barry Warsawc3573251996-12-20 21:56:07 +0000407 if (translate) {
Guido van Rossumed2554a1997-08-18 15:31:24 +0000408 re->re_patbuf.translate = (unsigned char *)PyString_AsString(translate);
Barry Warsawc3573251996-12-20 21:56:07 +0000409 if (!re->re_patbuf.translate)
410 goto finally;
411 Py_INCREF(translate);
412 }
Guido van Rossum36d330b1993-02-21 20:12:16 +0000413 else
414 re->re_patbuf.translate = NULL;
Guido van Rossum36d330b1993-02-21 20:12:16 +0000415 re->re_translate = translate;
416 re->re_lastok = NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000417 re->re_groupindex = groupindex;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000418 Py_INCREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000419 re->re_realpat = pattern;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000420 Py_INCREF(givenpat);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000421 re->re_givenpat = givenpat;
Guido van Rossumd19c04a1997-09-03 00:47:36 +0000422 error = _Py_re_compile_pattern((unsigned char *)pat, size, &re->re_patbuf);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000423 if (error != NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000424 PyErr_SetString(RegexError, error);
Barry Warsawc3573251996-12-20 21:56:07 +0000425 goto finally;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000426 }
427 }
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000428 return (PyObject *)re;
Barry Warsawc3573251996-12-20 21:56:07 +0000429 finally:
430 Py_DECREF(re);
431 return NULL;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000432}
433
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000434static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000435regex_compile(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000436 PyObject *self;
437 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000438{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000439 PyObject *pat = NULL;
440 PyObject *tran = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000441
Guido van Rossum43713e52000-02-29 13:59:29 +0000442 if (!PyArg_ParseTuple(args, "S|S:compile", &pat, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000443 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000444 return newregexobject(pat, tran, pat, NULL);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000445}
446
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000447static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000448symcomp(pattern, gdict)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000449 PyObject *pattern;
450 PyObject *gdict;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000451{
Barry Warsawc3573251996-12-20 21:56:07 +0000452 char *opat, *oend, *o, *n, *g, *v;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000453 int group_count = 0;
Barry Warsawc3573251996-12-20 21:56:07 +0000454 int sz;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000455 int escaped = 0;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000456 char name_buf[128];
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000457 PyObject *npattern;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000458 int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
459
Barry Warsawc3573251996-12-20 21:56:07 +0000460 if (!(opat = PyString_AsString(pattern)))
461 return NULL;
462
463 if ((sz = PyString_Size(pattern)) < 0)
464 return NULL;
465
466 oend = opat + sz;
467 o = opat;
468
Guido van Rossumab28c561996-06-11 18:33:14 +0000469 if (oend == opat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000470 Py_INCREF(pattern);
Guido van Rossumab28c561996-06-11 18:33:14 +0000471 return pattern;
472 }
473
Barry Warsawc3573251996-12-20 21:56:07 +0000474 if (!(npattern = PyString_FromStringAndSize((char*)NULL, sz)) ||
475 !(n = PyString_AsString(npattern)))
Guido van Rossumb6775db1994-08-01 11:34:53 +0000476 return NULL;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000477
478 while (o < oend) {
479 if (*o == '(' && escaped == require_escape) {
480 char *backtrack;
481 escaped = 0;
482 ++group_count;
483 *n++ = *o;
484 if (++o >= oend || *o != '<')
485 continue;
486 /* *o == '<' */
487 if (o+1 < oend && *(o+1) == '>')
488 continue;
489 backtrack = o;
490 g = name_buf;
491 for (++o; o < oend;) {
492 if (*o == '>') {
Barry Warsawc3573251996-12-20 21:56:07 +0000493 PyObject *group_name = NULL;
494 PyObject *group_index = NULL;
495 *g++ = '\0';
496 group_name = PyString_FromString(name_buf);
497 group_index = PyInt_FromLong(group_count);
498 if (group_name == NULL ||
499 group_index == NULL ||
500 PyDict_SetItem(gdict, group_name,
501 group_index) != 0)
502 {
503 Py_XDECREF(group_name);
504 Py_XDECREF(group_index);
505 Py_XDECREF(npattern);
506 return NULL;
507 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000508 Py_DECREF(group_name);
509 Py_DECREF(group_index);
Barry Warsawc3573251996-12-20 21:56:07 +0000510 ++o; /* eat the '>' */
511 break;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000512 }
Guido van Rossum7f7f2741995-02-10 17:01:56 +0000513 if (!isalnum(Py_CHARMASK(*o)) && *o != '_') {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000514 o = backtrack;
515 break;
516 }
517 *g++ = *o++;
518 }
519 }
Guido van Rossum0cbaff41996-10-23 17:53:06 +0000520 else if (*o == '[' && !escaped) {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000521 *n++ = *o;
Barry Warsawc3573251996-12-20 21:56:07 +0000522 ++o; /* eat the char following '[' */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000523 *n++ = *o;
524 while (o < oend && *o != ']') {
525 ++o;
526 *n++ = *o;
527 }
528 if (o < oend)
529 ++o;
530 }
531 else if (*o == '\\') {
532 escaped = 1;
533 *n++ = *o;
534 ++o;
535 }
536 else {
537 escaped = 0;
538 *n++ = *o;
539 ++o;
540 }
541 }
542
Barry Warsawc3573251996-12-20 21:56:07 +0000543 if (!(v = PyString_AsString(npattern))) {
544 Py_DECREF(npattern);
545 return NULL;
546 }
547 /* _PyString_Resize() decrements npattern on failure */
548 if (_PyString_Resize(&npattern, n - v) == 0)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000549 return npattern;
550 else {
Guido van Rossumb6775db1994-08-01 11:34:53 +0000551 return NULL;
552 }
553
554}
555
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000556static PyObject *
Guido van Rossumb6775db1994-08-01 11:34:53 +0000557regex_symcomp(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000558 PyObject *self;
559 PyObject *args;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000560{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000561 PyObject *pattern;
562 PyObject *tran = NULL;
563 PyObject *gdict = NULL;
564 PyObject *npattern;
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000565 PyObject *retval = NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000566
Guido van Rossum43713e52000-02-29 13:59:29 +0000567 if (!PyArg_ParseTuple(args, "S|S:symcomp", &pattern, &tran))
Barry Warsawc3573251996-12-20 21:56:07 +0000568 return NULL;
569
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000570 gdict = PyDict_New();
Barry Warsawc3573251996-12-20 21:56:07 +0000571 if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000572 Py_DECREF(gdict);
573 Py_DECREF(pattern);
Guido van Rossumb6775db1994-08-01 11:34:53 +0000574 return NULL;
575 }
Barry Warsaw4bc9d391997-01-09 22:22:05 +0000576 retval = newregexobject(npattern, tran, pattern, gdict);
577 Py_DECREF(npattern);
578 return retval;
Guido van Rossumb6775db1994-08-01 11:34:53 +0000579}
580
581
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000582static PyObject *cache_pat;
583static PyObject *cache_prog;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000584
585static int
586update_cache(pat)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000587 PyObject *pat;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000588{
Barry Warsawc3573251996-12-20 21:56:07 +0000589 PyObject *tuple = Py_BuildValue("(O)", pat);
590 int status = 0;
591
592 if (!tuple)
593 return -1;
594
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000595 if (pat != cache_pat) {
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000596 Py_XDECREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000597 cache_pat = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000598 Py_XDECREF(cache_prog);
Barry Warsawc3573251996-12-20 21:56:07 +0000599 cache_prog = regex_compile((PyObject *)NULL, tuple);
600 if (cache_prog == NULL) {
601 status = -1;
602 goto finally;
603 }
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000604 cache_pat = pat;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000605 Py_INCREF(cache_pat);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000606 }
Barry Warsawc3573251996-12-20 21:56:07 +0000607 finally:
608 Py_DECREF(tuple);
609 return status;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000610}
611
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000612static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000613regex_match(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000614 PyObject *self;
615 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000616{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000617 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000618 PyObject *tuple, *v;
619
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000620 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000621 return NULL;
622 if (update_cache(pat) < 0)
623 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000624
625 if (!(tuple = Py_BuildValue("(S)", string)))
626 return NULL;
627 v = regobj_match((regexobject *)cache_prog, tuple);
628 Py_DECREF(tuple);
629 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000630}
631
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000632static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000633regex_search(self, args)
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000634 PyObject *self;
635 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000636{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000637 PyObject *pat, *string;
Barry Warsawc3573251996-12-20 21:56:07 +0000638 PyObject *tuple, *v;
639
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000640 if (!PyArg_Parse(args, "(SS)", &pat, &string))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000641 return NULL;
642 if (update_cache(pat) < 0)
643 return NULL;
Barry Warsawc3573251996-12-20 21:56:07 +0000644
645 if (!(tuple = Py_BuildValue("(S)", string)))
646 return NULL;
647 v = regobj_search((regexobject *)cache_prog, tuple);
648 Py_DECREF(tuple);
649 return v;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000650}
651
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000652static PyObject *
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000653regex_set_syntax(self, args)
Barry Warsawc3573251996-12-20 21:56:07 +0000654 PyObject *self;
655 PyObject *args;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000656{
657 int syntax;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000658 if (!PyArg_Parse(args, "i", &syntax))
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000659 return NULL;
660 syntax = re_set_syntax(syntax);
Barry Warsawc3573251996-12-20 21:56:07 +0000661 /* wipe the global pattern cache */
662 Py_XDECREF(cache_pat);
663 cache_pat = NULL;
664 Py_XDECREF(cache_prog);
665 cache_prog = NULL;
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000666 return PyInt_FromLong((long)syntax);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000667}
668
Barry Warsaw909d7c31997-02-18 18:48:50 +0000669static PyObject *
670regex_get_syntax(self, args)
671 PyObject *self;
672 PyObject *args;
673{
674 if (!PyArg_Parse(args, ""))
675 return NULL;
676 return PyInt_FromLong((long)re_syntax);
677}
678
679
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000680static struct PyMethodDef regex_global_methods[] = {
Barry Warsawc3573251996-12-20 21:56:07 +0000681 {"compile", regex_compile, 1},
682 {"symcomp", regex_symcomp, 1},
Guido van Rossum295d1711995-02-19 15:55:19 +0000683 {"match", regex_match, 0},
684 {"search", regex_search, 0},
685 {"set_syntax", regex_set_syntax, 0},
Barry Warsaw909d7c31997-02-18 18:48:50 +0000686 {"get_syntax", regex_get_syntax, 0},
Barry Warsawc3573251996-12-20 21:56:07 +0000687 {NULL, NULL} /* sentinel */
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000688};
689
Guido van Rossum3886bb61998-12-04 18:50:17 +0000690DL_EXPORT(void)
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000691initregex()
692{
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000693 PyObject *m, *d, *v;
Barry Warsawc3573251996-12-20 21:56:07 +0000694 int i;
695 char *s;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000696
Guido van Rossumdfe8ad91996-07-24 00:51:20 +0000697 m = Py_InitModule("regex", regex_global_methods);
698 d = PyModule_GetDict(m);
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000699
700 /* Initialize regex.error exception */
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000701 v = RegexError = PyErr_NewException("regex.error", NULL, NULL);
Barry Warsawc3573251996-12-20 21:56:07 +0000702 if (v == NULL || PyDict_SetItemString(d, "error", v) != 0)
703 goto finally;
704
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000705 /* Initialize regex.casefold constant */
Barry Warsawc3573251996-12-20 21:56:07 +0000706 if (!(v = PyString_FromStringAndSize((char *)NULL, 256)))
707 goto finally;
708
709 if (!(s = PyString_AsString(v)))
710 goto finally;
711
712 for (i = 0; i < 256; i++) {
713 if (isupper(i))
714 s[i] = tolower(i);
715 else
716 s[i] = i;
Guido van Rossumccd5bad1993-02-23 13:42:39 +0000717 }
Barry Warsawc3573251996-12-20 21:56:07 +0000718 if (PyDict_SetItemString(d, "casefold", v) < 0)
719 goto finally;
720 Py_DECREF(v);
721
722 if (!PyErr_Occurred())
723 return;
724 finally:
Guido van Rossum0cb96de1997-10-01 04:29:29 +0000725 /* Nothing */ ;
Guido van Rossum6f4c43d1991-12-30 01:42:57 +0000726}