blob: d08d4ea1dd37a365bf4126e9c2beaed4bd8a38e9 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/* Pcre objects */
2
3#include "Python.h"
4
5#ifndef Py_eval_input
6/* For Python 1.4, graminit.h has to be explicitly included */
7#include "graminit.h"
8#define Py_eval_input eval_input
9#endif
10
11#ifndef FOR_PYTHON
12#define FOR_PYTHON
13#endif
14
15#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000016#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000017
18static PyObject *ErrorObject;
19
20typedef struct {
21 PyObject_HEAD
22 pcre *regex;
23 pcre_extra *regex_extra;
24 int num_groups;
25} PcreObject;
26
27staticforward PyTypeObject Pcre_Type;
28
29#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
30#define NORMAL 0
31#define CHARCLASS 1
32#define REPLACEMENT 2
33
34#define CHAR 0
35#define MEMORY_REFERENCE 1
36#define SYNTAX 2
37#define NOT_SYNTAX 3
38#define SET 4
39#define WORD_BOUNDARY 5
40#define NOT_WORD_BOUNDARY 6
41#define BEGINNING_OF_BUFFER 7
42#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000043#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000044
45static PcreObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000046newPcreObject(PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000047{
48 PcreObject *self;
Guido van Rossumb18618d2000-05-03 23:44:39 +000049 self = PyObject_New(PcreObject, &Pcre_Type);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000050 if (self == NULL)
51 return NULL;
52 self->regex = NULL;
53 self->regex_extra = NULL;
54 return self;
55}
56
57/* Pcre methods */
58
59static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000060PyPcre_dealloc(PcreObject *self)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000061{
Andrew M. Kuchling0c7822e2000-02-18 18:30:01 +000062 if (self->regex) (pcre_free)(self->regex);
63 if (self->regex_extra) (pcre_free)(self->regex_extra);
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_Del(self);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000065}
66
67
68static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000069PyPcre_exec(PcreObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000070{
Guido van Rossum50700601997-12-08 17:15:20 +000071 char *string;
72 int stringlen, pos = 0, options=0, endpos = -1, i, count;
73 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +000074 PyObject *list;
75
Andrew M. Kuchling888aa262001-06-18 19:04:04 +000076 if (!PyArg_ParseTuple(args, "t#|iii:match", &string, &stringlen,
77 &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +000078 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +000079 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +000080 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +000081 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +000082 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +000083 /* If an error occurred during the match, and an exception was raised,
84 just return NULL and leave the exception alone. The most likely
85 problem to cause this would be running out of memory for
86 the failure stack. */
87 if (PyErr_Occurred())
88 {
89 return NULL;
90 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +000091 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
92 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +000093 {
Guido van Rossum58132c61997-12-17 00:24:13 +000094 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
95 PyErr_SetObject(ErrorObject, errval);
96 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +000097 return NULL;
98 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +000099
100 list=PyList_New(self->num_groups+1);
101 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000102 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000103 {
104 PyObject *v;
105 int start=offsets[i*2], end=offsets[i*2+1];
106 /* If the group wasn't affected by the match, return -1, -1 */
107 if (start<0 || count<=i)
108 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000109 v=Py_BuildValue("ii", start, end);
110 if (v==NULL) {Py_DECREF(list); return NULL;}
111 PyList_SetItem(list, i, v);
112 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000113 return list;
114}
115
116static PyMethodDef Pcre_methods[] = {
117 {"match", (PyCFunction)PyPcre_exec, 1},
118 {NULL, NULL} /* sentinel */
119};
120
121static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000122PyPcre_getattr(PcreObject *self, char *name)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000123{
124 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
125}
126
127
128staticforward PyTypeObject Pcre_Type = {
Fred Drake0d40ba42000-02-04 20:33:49 +0000129 PyObject_HEAD_INIT(NULL)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000130 0, /*ob_size*/
Guido van Rossum14648392001-12-08 18:02:58 +0000131 "pcre.Pcre", /*tp_name*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000132 sizeof(PcreObject), /*tp_basicsize*/
133 0, /*tp_itemsize*/
134 /* methods */
135 (destructor)PyPcre_dealloc, /*tp_dealloc*/
136 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000137 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000138 0, /*tp_setattr*/
139 0, /*tp_compare*/
140 0, /*tp_repr*/
141 0, /*tp_as_number*/
142 0, /*tp_as_sequence*/
143 0, /*tp_as_mapping*/
144 0, /*tp_hash*/
145};
146/* --------------------------------------------------------------------- */
147
148static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000149PyPcre_compile(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000150{
151 PcreObject *rv;
152 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000153 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000154 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000155
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000156 int options, erroroffset;
Guido van Rossum43713e52000-02-29 13:59:29 +0000157 if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000158 &PyDict_Type, &dictionary))
159 return NULL;
160 rv = newPcreObject(args);
161 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000162 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000163
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000164 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000165 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000166 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000167 {
Guido van Rossumc4428c52000-04-25 15:59:32 +0000168 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000169 if (!PyErr_Occurred())
170 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000171 PyObject *errval = Py_BuildValue("si", error, erroroffset);
172 PyErr_SetObject(ErrorObject, errval);
173 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000174 }
175 return NULL;
176 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000177 rv->regex_extra=pcre_study(rv->regex, 0, &error);
178 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000179 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000180 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000181 Py_DECREF(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000182 PyErr_SetObject(ErrorObject, errval);
183 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000184 return NULL;
185 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000186 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
187 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000188 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000189 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
190 PyErr_SetObject(ErrorObject, errval);
191 Py_XDECREF(errval);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000192 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000193 return NULL;
194 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000195 return (PyObject *)rv;
196}
197
198static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000199PyPcre_expand_escape(unsigned char *pattern, int pattern_len,
200 int *indexptr, int *typeptr)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000201{
Guido van Rossum50700601997-12-08 17:15:20 +0000202 unsigned char c;
203 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000204
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000205 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000206 {
207 PyErr_SetString(ErrorObject, "escape ends too soon");
208 return NULL;
209 }
210 c=pattern[index]; index++;
211 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000212
Guido van Rossum50700601997-12-08 17:15:20 +0000213 switch (c)
214 {
215 case('t'):
216 *indexptr=index;
217 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000218 case('n'):
219 *indexptr = index;
220 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000221 case('v'):
222 *indexptr = index;
223 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000224 case('r'):
225 *indexptr = index;
226 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000227 case('f'):
228 *indexptr = index;
229 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000230 case('a'):
231 *indexptr = index;
232 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000233 case('b'):
234 *indexptr=index;
235 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000236 case('\\'):
237 *indexptr=index;
238 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000239
Guido van Rossum50700601997-12-08 17:15:20 +0000240 case('x'):
241 {
242 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000243
Guido van Rossum50700601997-12-08 17:15:20 +0000244 x = 0; end = index;
245 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
246 {
247 ch = pattern[end];
248 x = x * 16 + pcre_lcc[ch] -
249 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
250 x &= 255;
251 end++;
252 }
253 if (end==index)
254 {
255 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
256 return NULL;
257 }
258 *indexptr = end;
259 return Py_BuildValue("c", (char)x);
260 }
Guido van Rossum50700601997-12-08 17:15:20 +0000261
262 case('E'): case('G'): case('L'): case('Q'):
263 case('U'): case('l'): case('u'):
264 {
265 char message[50];
Tim Peters885d4572001-11-28 20:27:42 +0000266 PyOS_snprintf(message, sizeof(message),
267 "\\%c is not allowed", c);
Guido van Rossum50700601997-12-08 17:15:20 +0000268 PyErr_SetString(ErrorObject, message);
269 return NULL;
270 }
271
272 case('g'):
273 {
274 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000275 int group_num = 0, is_number=0;
276
Guido van Rossum50700601997-12-08 17:15:20 +0000277 if (pattern_len<=index)
278 {
279 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
280 return NULL;
281 }
282 if (pattern[index]!='<')
283 {
284 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
285 return NULL;
286 }
287 index++;
288 end=index;
289 while (end<pattern_len && pattern[end]!='>')
290 end++;
291 if (end==pattern_len)
292 {
293 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
294 return NULL;
295 }
296
297 if (index==end) /* Zero-length name */
298 {
299 /* XXX should include the text of the reference */
300 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
301 return NULL;
302 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000303 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000304 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000305 is_number = 1;
306 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000307 }
308
309 for(i=index+1; i<end; i++)
310 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000311 if (is_number &&
312 !(pcre_ctypes[pattern[i]] & ctype_digit) )
313 {
314 /* XXX should include the text of the reference */
315 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
316 return NULL;
317 }
318 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000319 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
320 {
321 /* XXX should include the text of the reference */
322 PyErr_SetString(ErrorObject, "illegal symbolic reference");
323 return NULL;
324 }
325 }
326
327 *typeptr = MEMORY_REFERENCE;
328 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000329 /* If it's a number, return the integer value of the group */
330 if (is_number) return Py_BuildValue("i", group_num);
331 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000332 return Py_BuildValue("s#", pattern+index, end-index);
333 }
Guido van Rossum50700601997-12-08 17:15:20 +0000334
335 case('0'):
336 {
337 /* \0 always indicates an octal escape, so we consume up to 3
338 characters, as long as they're all octal digits */
339 int octval=0, i;
340 index--;
341 for(i=index;
342 i<=index+2 && i<pattern_len
343 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
344 i++)
345 {
346 octval = octval * 8 + pattern[i] - '0';
347 }
348 if (octval>255)
349 {
350 PyErr_SetString(ErrorObject, "octal value out of range");
351 return NULL;
352 }
353 *indexptr = i;
354 return Py_BuildValue("c", (unsigned char)octval);
355 }
Fred Drakedff3a372001-07-19 21:29:49 +0000356
Guido van Rossum50700601997-12-08 17:15:20 +0000357 case('1'): case('2'): case('3'): case('4'):
358 case('5'): case('6'): case('7'): case('8'):
359 case('9'):
360 {
361 /* Handle \?, where ? is from 1 through 9 */
362 int value=0;
363 index--;
364 /* If it's at least a two-digit reference, like \34, it might
365 either be a 3-digit octal escape (\123) or a 2-digit
366 decimal memory reference (\34) */
367
368 if ( (index+1) <pattern_len &&
369 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
370 {
371 if ( (index+2) <pattern_len &&
372 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
373 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
374 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
375 )
376 {
377 /* 3 octal digits */
378 value= 8*8*(pattern[index ]-'0') +
379 8*(pattern[index+1]-'0') +
380 (pattern[index+2]-'0');
381 if (value>255)
382 {
383 PyErr_SetString(ErrorObject, "octal value out of range");
384 return NULL;
385 }
386 *indexptr = index+3;
387 return Py_BuildValue("c", (unsigned char)value);
388 }
389 else
390 {
391 /* 2-digit form, so it's a memory reference */
392 value= 10*(pattern[index ]-'0') +
393 (pattern[index+1]-'0');
394 if (value<1 || EXTRACT_MAX<=value)
395 {
396 PyErr_SetString(ErrorObject, "memory reference out of range");
397 return NULL;
398 }
399 *typeptr = MEMORY_REFERENCE;
400 *indexptr = index+2;
401 return Py_BuildValue("i", value);
402 }
403 }
404 else
405 {
406 /* Single-digit form, like \2, so it's a memory reference */
407 *typeptr = MEMORY_REFERENCE;
408 *indexptr = index+1;
409 return Py_BuildValue("i", pattern[index]-'0');
410 }
411 }
Guido van Rossum50700601997-12-08 17:15:20 +0000412
413 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000414 /* It's some unknown escape like \s, so return a string containing
415 \s */
416 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000417 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000418 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000419 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000420}
421
422static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000423PyPcre_expand(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000424{
Guido van Rossum50700601997-12-08 17:15:20 +0000425 PyObject *results, *match_obj;
426 PyObject *repl_obj, *newstring;
427 unsigned char *repl;
428 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000429
Guido van Rossum43713e52000-02-29 13:59:29 +0000430 if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj))
Guido van Rossum50700601997-12-08 17:15:20 +0000431 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000432
Guido van Rossum50700601997-12-08 17:15:20 +0000433 repl=(unsigned char *)PyString_AsString(repl_obj);
434 size=PyString_Size(repl_obj);
435 results=PyList_New(0);
436 if (results==NULL) return NULL;
437 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000438 {
Guido van Rossum50700601997-12-08 17:15:20 +0000439 if (repl[i]=='\\')
440 {
441 PyObject *value;
442 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000443
Guido van Rossum50700601997-12-08 17:15:20 +0000444 if (start!=i)
445 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000446 int status;
447 PyObject *s = PyString_FromStringAndSize(
448 (char *)repl+start, i-start);
449 if (s == NULL) {
450 Py_DECREF(results);
451 return NULL;
452 }
453 status = PyList_Append(results, s);
454 Py_DECREF(s);
455 if (status < 0) {
456 Py_DECREF(results);
457 return NULL;
458 }
Guido van Rossum50700601997-12-08 17:15:20 +0000459 total_len += i-start;
460 }
461 i++;
462 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
463 if (value==NULL)
464 {
465 /* PyPcre_expand_escape triggered an exception of some sort,
466 so just return */
467 Py_DECREF(results);
468 return NULL;
469 }
470 switch (escape_type)
471 {
472 case (CHAR):
473 PyList_Append(results, value);
474 total_len += PyString_Size(value);
475 break;
476 case(MEMORY_REFERENCE):
477 {
478 PyObject *r, *tuple, *result;
479 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000480 if (r == NULL) {
481 Py_DECREF(results);
482 return NULL;
483 }
Guido van Rossum50700601997-12-08 17:15:20 +0000484 tuple=PyTuple_New(1);
485 Py_INCREF(value);
486 PyTuple_SetItem(tuple, 0, value);
487 result=PyEval_CallObject(r, tuple);
488 Py_DECREF(r); Py_DECREF(tuple);
489 if (result==NULL)
490 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000491 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000492 Py_DECREF(results);
493 Py_DECREF(value);
494 return NULL;
495 }
496 if (result==Py_None)
497 {
498 char message[50];
Tim Peters885d4572001-11-28 20:27:42 +0000499 PyOS_snprintf(message, sizeof(message),
Guido van Rossum50700601997-12-08 17:15:20 +0000500 "group did not contribute to the match");
501 PyErr_SetString(ErrorObject,
502 message);
503 Py_DECREF(result);
504 Py_DECREF(value);
505 Py_DECREF(results);
506 return NULL;
507 }
508 /* typecheck that it's a string! */
509 if (!PyString_Check(result))
510 {
511 Py_DECREF(results);
512 Py_DECREF(result);
513 PyErr_SetString(ErrorObject,
514 "group() must return a string value for replacement");
515 return NULL;
516 }
517 PyList_Append(results, result);
518 total_len += PyString_Size(result);
519 Py_DECREF(result);
520 }
521 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000522 case(STRING):
523 {
524 PyList_Append(results, value);
525 total_len += PyString_Size(value);
526 break;
527 }
Guido van Rossum50700601997-12-08 17:15:20 +0000528 default:
529 Py_DECREF(results);
530 PyErr_SetString(ErrorObject,
531 "bad escape in replacement");
532 return NULL;
533 }
534 Py_DECREF(value);
535 start=i;
536 i--; /* Decrement now, because the 'for' loop will increment it */
537 }
538 } /* endif repl[i]!='\\' */
539
540 if (start!=i)
541 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000542 int status;
543 PyObject *s = PyString_FromStringAndSize((char *)repl+start,
544 i-start);
545 if (s == NULL) {
546 Py_DECREF(results);
547 return NULL;
548 }
549 status = PyList_Append(results, s);
550 Py_DECREF(s);
551 if (status < 0) {
552 Py_DECREF(results);
553 return NULL;
554 }
Guido van Rossum50700601997-12-08 17:15:20 +0000555 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000556 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000557
Guido van Rossum50700601997-12-08 17:15:20 +0000558 /* Whew! Now we've constructed a list containing various pieces of
559 strings that will make up our final result. So, iterate over
560 the list concatenating them. A new string measuring total_len
561 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000562
Guido van Rossum50700601997-12-08 17:15:20 +0000563 newstring=PyString_FromStringAndSize(NULL, total_len);
564 if (newstring==NULL)
565 {
566 Py_DECREF(results);
567 return NULL;
568 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000569
Guido van Rossum50700601997-12-08 17:15:20 +0000570 repl=(unsigned char *)PyString_AsString(newstring);
571 for (pos=i=0; i<PyList_Size(results); i++)
572 {
573 PyObject *item=PyList_GetItem(results, i);
574 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
575 pos += PyString_Size(item);
576 }
577 Py_DECREF(results);
578 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000579}
580
581
582/* List of functions defined in the module */
583
584static PyMethodDef pcre_methods[] = {
585 {"pcre_compile", PyPcre_compile, 1},
586 {"pcre_expand", PyPcre_expand, 1},
587 {NULL, NULL} /* sentinel */
588};
589
590
591/*
592 * Convenience routine to export an integer value.
593 * For simplicity, errors (which are unlikely anyway) are ignored.
594 */
595
596static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000597insint(PyObject *d, char *name, int value)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000598{
599 PyObject *v = PyInt_FromLong((long) value);
600 if (v == NULL) {
601 /* Don't bother reporting this error */
602 PyErr_Clear();
603 }
604 else {
605 PyDict_SetItemString(d, name, v);
606 Py_DECREF(v);
607 }
608}
609
610
611/* Initialization function for the module (*must* be called initpcre) */
612
Guido van Rossum3886bb61998-12-04 18:50:17 +0000613DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000614initpcre(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000615{
616 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000617
Fred Drake0d40ba42000-02-04 20:33:49 +0000618 Pcre_Type.ob_type = &PyType_Type;
619
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000620 /* Create the module and add the functions */
621 m = Py_InitModule("pcre", pcre_methods);
622
623 /* Add some symbolic constants to the module */
624 d = PyModule_GetDict(m);
Fred Drake589c35b2000-07-06 19:38:49 +0000625 ErrorObject = PyErr_NewException("pcre.error", NULL, NULL);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000626 PyDict_SetItemString(d, "error", ErrorObject);
627
628 /* Insert the flags */
629 insint(d, "IGNORECASE", PCRE_CASELESS);
630 insint(d, "ANCHORED", PCRE_ANCHORED);
631 insint(d, "MULTILINE", PCRE_MULTILINE);
632 insint(d, "DOTALL", PCRE_DOTALL);
633 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000634 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000635}
636