blob: e34c002b039eeda57cf2133a4d3b40e5b7684553 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/* Pcre objects */
2
3#include "Python.h"
4
5#ifndef Py_eval_input
6/* For Python 1.4, graminit.h has to be explicitly included */
7#include "graminit.h"
8#define Py_eval_input eval_input
9#endif
10
11#ifndef FOR_PYTHON
12#define FOR_PYTHON
13#endif
14
15#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000016#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000017
18static PyObject *ErrorObject;
19
20typedef struct {
21 PyObject_HEAD
22 pcre *regex;
23 pcre_extra *regex_extra;
24 int num_groups;
25} PcreObject;
26
27staticforward PyTypeObject Pcre_Type;
28
29#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
30#define NORMAL 0
31#define CHARCLASS 1
32#define REPLACEMENT 2
33
34#define CHAR 0
35#define MEMORY_REFERENCE 1
36#define SYNTAX 2
37#define NOT_SYNTAX 3
38#define SET 4
39#define WORD_BOUNDARY 5
40#define NOT_WORD_BOUNDARY 6
41#define BEGINNING_OF_BUFFER 7
42#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000043#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000044
45static PcreObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000046newPcreObject(PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000047{
48 PcreObject *self;
Guido van Rossumb18618d2000-05-03 23:44:39 +000049 self = PyObject_New(PcreObject, &Pcre_Type);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000050 if (self == NULL)
51 return NULL;
52 self->regex = NULL;
53 self->regex_extra = NULL;
54 return self;
55}
56
57/* Pcre methods */
58
59static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000060PyPcre_dealloc(PcreObject *self)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000061{
Andrew M. Kuchling0c7822e2000-02-18 18:30:01 +000062 if (self->regex) (pcre_free)(self->regex);
63 if (self->regex_extra) (pcre_free)(self->regex_extra);
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_Del(self);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000065}
66
67
68static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000069PyPcre_exec(PcreObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000070{
Guido van Rossum50700601997-12-08 17:15:20 +000071 char *string;
72 int stringlen, pos = 0, options=0, endpos = -1, i, count;
73 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +000074 PyObject *list;
75
Andrew M. Kuchling888aa262001-06-18 19:04:04 +000076 if (!PyArg_ParseTuple(args, "t#|iii:match", &string, &stringlen,
77 &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +000078 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +000079 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +000080 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +000081 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +000082 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +000083 /* If an error occurred during the match, and an exception was raised,
84 just return NULL and leave the exception alone. The most likely
85 problem to cause this would be running out of memory for
86 the failure stack. */
87 if (PyErr_Occurred())
88 {
89 return NULL;
90 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +000091 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
92 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +000093 {
Guido van Rossum58132c61997-12-17 00:24:13 +000094 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
95 PyErr_SetObject(ErrorObject, errval);
96 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +000097 return NULL;
98 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +000099
100 list=PyList_New(self->num_groups+1);
101 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000102 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000103 {
104 PyObject *v;
105 int start=offsets[i*2], end=offsets[i*2+1];
106 /* If the group wasn't affected by the match, return -1, -1 */
107 if (start<0 || count<=i)
108 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000109 v=Py_BuildValue("ii", start, end);
110 if (v==NULL) {Py_DECREF(list); return NULL;}
111 PyList_SetItem(list, i, v);
112 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000113 return list;
114}
115
116static PyMethodDef Pcre_methods[] = {
117 {"match", (PyCFunction)PyPcre_exec, 1},
118 {NULL, NULL} /* sentinel */
119};
120
121static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000122PyPcre_getattr(PcreObject *self, char *name)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000123{
124 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
125}
126
127
128staticforward PyTypeObject Pcre_Type = {
Fred Drake0d40ba42000-02-04 20:33:49 +0000129 PyObject_HEAD_INIT(NULL)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000130 0, /*ob_size*/
131 "Pcre", /*tp_name*/
132 sizeof(PcreObject), /*tp_basicsize*/
133 0, /*tp_itemsize*/
134 /* methods */
135 (destructor)PyPcre_dealloc, /*tp_dealloc*/
136 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000137 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000138 0, /*tp_setattr*/
139 0, /*tp_compare*/
140 0, /*tp_repr*/
141 0, /*tp_as_number*/
142 0, /*tp_as_sequence*/
143 0, /*tp_as_mapping*/
144 0, /*tp_hash*/
145};
146/* --------------------------------------------------------------------- */
147
148static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000149PyPcre_compile(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000150{
151 PcreObject *rv;
152 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000153 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000154 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000155
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000156 int options, erroroffset;
Guido van Rossum43713e52000-02-29 13:59:29 +0000157 if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000158 &PyDict_Type, &dictionary))
159 return NULL;
160 rv = newPcreObject(args);
161 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000162 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000163
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000164 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000165 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000166 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000167 {
Guido van Rossumc4428c52000-04-25 15:59:32 +0000168 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000169 if (!PyErr_Occurred())
170 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000171 PyObject *errval = Py_BuildValue("si", error, erroroffset);
172 PyErr_SetObject(ErrorObject, errval);
173 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000174 }
175 return NULL;
176 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000177 rv->regex_extra=pcre_study(rv->regex, 0, &error);
178 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000179 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000180 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000181 Py_DECREF(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000182 PyErr_SetObject(ErrorObject, errval);
183 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000184 return NULL;
185 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000186 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
187 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000188 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000189 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
190 PyErr_SetObject(ErrorObject, errval);
191 Py_XDECREF(errval);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000192 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000193 return NULL;
194 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000195 return (PyObject *)rv;
196}
197
198static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000199PyPcre_expand_escape(unsigned char *pattern, int pattern_len,
200 int *indexptr, int *typeptr)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000201{
Guido van Rossum50700601997-12-08 17:15:20 +0000202 unsigned char c;
203 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000204
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000205 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000206 {
207 PyErr_SetString(ErrorObject, "escape ends too soon");
208 return NULL;
209 }
210 c=pattern[index]; index++;
211 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000212
Guido van Rossum50700601997-12-08 17:15:20 +0000213 switch (c)
214 {
215 case('t'):
216 *indexptr=index;
217 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000218 case('n'):
219 *indexptr = index;
220 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000221 case('v'):
222 *indexptr = index;
223 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000224 case('r'):
225 *indexptr = index;
226 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000227 case('f'):
228 *indexptr = index;
229 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000230 case('a'):
231 *indexptr = index;
232 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000233 case('b'):
234 *indexptr=index;
235 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000236 case('\\'):
237 *indexptr=index;
238 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000239
Guido van Rossum50700601997-12-08 17:15:20 +0000240 case('x'):
241 {
242 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000243
Guido van Rossum50700601997-12-08 17:15:20 +0000244 x = 0; end = index;
245 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
246 {
247 ch = pattern[end];
248 x = x * 16 + pcre_lcc[ch] -
249 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
250 x &= 255;
251 end++;
252 }
253 if (end==index)
254 {
255 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
256 return NULL;
257 }
258 *indexptr = end;
259 return Py_BuildValue("c", (char)x);
260 }
Guido van Rossum50700601997-12-08 17:15:20 +0000261
262 case('E'): case('G'): case('L'): case('Q'):
263 case('U'): case('l'): case('u'):
264 {
265 char message[50];
266 sprintf(message, "\\%c is not allowed", c);
267 PyErr_SetString(ErrorObject, message);
268 return NULL;
269 }
270
271 case('g'):
272 {
273 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000274 int group_num = 0, is_number=0;
275
Guido van Rossum50700601997-12-08 17:15:20 +0000276 if (pattern_len<=index)
277 {
278 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
279 return NULL;
280 }
281 if (pattern[index]!='<')
282 {
283 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
284 return NULL;
285 }
286 index++;
287 end=index;
288 while (end<pattern_len && pattern[end]!='>')
289 end++;
290 if (end==pattern_len)
291 {
292 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
293 return NULL;
294 }
295
296 if (index==end) /* Zero-length name */
297 {
298 /* XXX should include the text of the reference */
299 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
300 return NULL;
301 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000302 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000303 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000304 is_number = 1;
305 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000306 }
307
308 for(i=index+1; i<end; i++)
309 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000310 if (is_number &&
311 !(pcre_ctypes[pattern[i]] & ctype_digit) )
312 {
313 /* XXX should include the text of the reference */
314 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
315 return NULL;
316 }
317 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000318 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
319 {
320 /* XXX should include the text of the reference */
321 PyErr_SetString(ErrorObject, "illegal symbolic reference");
322 return NULL;
323 }
324 }
325
326 *typeptr = MEMORY_REFERENCE;
327 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000328 /* If it's a number, return the integer value of the group */
329 if (is_number) return Py_BuildValue("i", group_num);
330 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000331 return Py_BuildValue("s#", pattern+index, end-index);
332 }
Guido van Rossum50700601997-12-08 17:15:20 +0000333
334 case('0'):
335 {
336 /* \0 always indicates an octal escape, so we consume up to 3
337 characters, as long as they're all octal digits */
338 int octval=0, i;
339 index--;
340 for(i=index;
341 i<=index+2 && i<pattern_len
342 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
343 i++)
344 {
345 octval = octval * 8 + pattern[i] - '0';
346 }
347 if (octval>255)
348 {
349 PyErr_SetString(ErrorObject, "octal value out of range");
350 return NULL;
351 }
352 *indexptr = i;
353 return Py_BuildValue("c", (unsigned char)octval);
354 }
Fred Drakedff3a372001-07-19 21:29:49 +0000355
Guido van Rossum50700601997-12-08 17:15:20 +0000356 case('1'): case('2'): case('3'): case('4'):
357 case('5'): case('6'): case('7'): case('8'):
358 case('9'):
359 {
360 /* Handle \?, where ? is from 1 through 9 */
361 int value=0;
362 index--;
363 /* If it's at least a two-digit reference, like \34, it might
364 either be a 3-digit octal escape (\123) or a 2-digit
365 decimal memory reference (\34) */
366
367 if ( (index+1) <pattern_len &&
368 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
369 {
370 if ( (index+2) <pattern_len &&
371 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
372 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
373 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
374 )
375 {
376 /* 3 octal digits */
377 value= 8*8*(pattern[index ]-'0') +
378 8*(pattern[index+1]-'0') +
379 (pattern[index+2]-'0');
380 if (value>255)
381 {
382 PyErr_SetString(ErrorObject, "octal value out of range");
383 return NULL;
384 }
385 *indexptr = index+3;
386 return Py_BuildValue("c", (unsigned char)value);
387 }
388 else
389 {
390 /* 2-digit form, so it's a memory reference */
391 value= 10*(pattern[index ]-'0') +
392 (pattern[index+1]-'0');
393 if (value<1 || EXTRACT_MAX<=value)
394 {
395 PyErr_SetString(ErrorObject, "memory reference out of range");
396 return NULL;
397 }
398 *typeptr = MEMORY_REFERENCE;
399 *indexptr = index+2;
400 return Py_BuildValue("i", value);
401 }
402 }
403 else
404 {
405 /* Single-digit form, like \2, so it's a memory reference */
406 *typeptr = MEMORY_REFERENCE;
407 *indexptr = index+1;
408 return Py_BuildValue("i", pattern[index]-'0');
409 }
410 }
Guido van Rossum50700601997-12-08 17:15:20 +0000411
412 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000413 /* It's some unknown escape like \s, so return a string containing
414 \s */
415 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000416 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000417 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000418 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000419}
420
421static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000422PyPcre_expand(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000423{
Guido van Rossum50700601997-12-08 17:15:20 +0000424 PyObject *results, *match_obj;
425 PyObject *repl_obj, *newstring;
426 unsigned char *repl;
427 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000428
Guido van Rossum43713e52000-02-29 13:59:29 +0000429 if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj))
Guido van Rossum50700601997-12-08 17:15:20 +0000430 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000431
Guido van Rossum50700601997-12-08 17:15:20 +0000432 repl=(unsigned char *)PyString_AsString(repl_obj);
433 size=PyString_Size(repl_obj);
434 results=PyList_New(0);
435 if (results==NULL) return NULL;
436 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000437 {
Guido van Rossum50700601997-12-08 17:15:20 +0000438 if (repl[i]=='\\')
439 {
440 PyObject *value;
441 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000442
Guido van Rossum50700601997-12-08 17:15:20 +0000443 if (start!=i)
444 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000445 int status;
446 PyObject *s = PyString_FromStringAndSize(
447 (char *)repl+start, i-start);
448 if (s == NULL) {
449 Py_DECREF(results);
450 return NULL;
451 }
452 status = PyList_Append(results, s);
453 Py_DECREF(s);
454 if (status < 0) {
455 Py_DECREF(results);
456 return NULL;
457 }
Guido van Rossum50700601997-12-08 17:15:20 +0000458 total_len += i-start;
459 }
460 i++;
461 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
462 if (value==NULL)
463 {
464 /* PyPcre_expand_escape triggered an exception of some sort,
465 so just return */
466 Py_DECREF(results);
467 return NULL;
468 }
469 switch (escape_type)
470 {
471 case (CHAR):
472 PyList_Append(results, value);
473 total_len += PyString_Size(value);
474 break;
475 case(MEMORY_REFERENCE):
476 {
477 PyObject *r, *tuple, *result;
478 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000479 if (r == NULL) {
480 Py_DECREF(results);
481 return NULL;
482 }
Guido van Rossum50700601997-12-08 17:15:20 +0000483 tuple=PyTuple_New(1);
484 Py_INCREF(value);
485 PyTuple_SetItem(tuple, 0, value);
486 result=PyEval_CallObject(r, tuple);
487 Py_DECREF(r); Py_DECREF(tuple);
488 if (result==NULL)
489 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000490 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000491 Py_DECREF(results);
492 Py_DECREF(value);
493 return NULL;
494 }
495 if (result==Py_None)
496 {
497 char message[50];
498 sprintf(message,
499 "group did not contribute to the match");
500 PyErr_SetString(ErrorObject,
501 message);
502 Py_DECREF(result);
503 Py_DECREF(value);
504 Py_DECREF(results);
505 return NULL;
506 }
507 /* typecheck that it's a string! */
508 if (!PyString_Check(result))
509 {
510 Py_DECREF(results);
511 Py_DECREF(result);
512 PyErr_SetString(ErrorObject,
513 "group() must return a string value for replacement");
514 return NULL;
515 }
516 PyList_Append(results, result);
517 total_len += PyString_Size(result);
518 Py_DECREF(result);
519 }
520 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000521 case(STRING):
522 {
523 PyList_Append(results, value);
524 total_len += PyString_Size(value);
525 break;
526 }
Guido van Rossum50700601997-12-08 17:15:20 +0000527 default:
528 Py_DECREF(results);
529 PyErr_SetString(ErrorObject,
530 "bad escape in replacement");
531 return NULL;
532 }
533 Py_DECREF(value);
534 start=i;
535 i--; /* Decrement now, because the 'for' loop will increment it */
536 }
537 } /* endif repl[i]!='\\' */
538
539 if (start!=i)
540 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000541 int status;
542 PyObject *s = PyString_FromStringAndSize((char *)repl+start,
543 i-start);
544 if (s == NULL) {
545 Py_DECREF(results);
546 return NULL;
547 }
548 status = PyList_Append(results, s);
549 Py_DECREF(s);
550 if (status < 0) {
551 Py_DECREF(results);
552 return NULL;
553 }
Guido van Rossum50700601997-12-08 17:15:20 +0000554 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000555 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000556
Guido van Rossum50700601997-12-08 17:15:20 +0000557 /* Whew! Now we've constructed a list containing various pieces of
558 strings that will make up our final result. So, iterate over
559 the list concatenating them. A new string measuring total_len
560 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000561
Guido van Rossum50700601997-12-08 17:15:20 +0000562 newstring=PyString_FromStringAndSize(NULL, total_len);
563 if (newstring==NULL)
564 {
565 Py_DECREF(results);
566 return NULL;
567 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000568
Guido van Rossum50700601997-12-08 17:15:20 +0000569 repl=(unsigned char *)PyString_AsString(newstring);
570 for (pos=i=0; i<PyList_Size(results); i++)
571 {
572 PyObject *item=PyList_GetItem(results, i);
573 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
574 pos += PyString_Size(item);
575 }
576 Py_DECREF(results);
577 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000578}
579
580
581/* List of functions defined in the module */
582
583static PyMethodDef pcre_methods[] = {
584 {"pcre_compile", PyPcre_compile, 1},
585 {"pcre_expand", PyPcre_expand, 1},
586 {NULL, NULL} /* sentinel */
587};
588
589
590/*
591 * Convenience routine to export an integer value.
592 * For simplicity, errors (which are unlikely anyway) are ignored.
593 */
594
595static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000596insint(PyObject *d, char *name, int value)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000597{
598 PyObject *v = PyInt_FromLong((long) value);
599 if (v == NULL) {
600 /* Don't bother reporting this error */
601 PyErr_Clear();
602 }
603 else {
604 PyDict_SetItemString(d, name, v);
605 Py_DECREF(v);
606 }
607}
608
609
610/* Initialization function for the module (*must* be called initpcre) */
611
Guido van Rossum3886bb61998-12-04 18:50:17 +0000612DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000613initpcre(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000614{
615 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000616
Fred Drake0d40ba42000-02-04 20:33:49 +0000617 Pcre_Type.ob_type = &PyType_Type;
618
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000619 /* Create the module and add the functions */
620 m = Py_InitModule("pcre", pcre_methods);
621
622 /* Add some symbolic constants to the module */
623 d = PyModule_GetDict(m);
Fred Drake589c35b2000-07-06 19:38:49 +0000624 ErrorObject = PyErr_NewException("pcre.error", NULL, NULL);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000625 PyDict_SetItemString(d, "error", ErrorObject);
626
627 /* Insert the flags */
628 insint(d, "IGNORECASE", PCRE_CASELESS);
629 insint(d, "ANCHORED", PCRE_ANCHORED);
630 insint(d, "MULTILINE", PCRE_MULTILINE);
631 insint(d, "DOTALL", PCRE_DOTALL);
632 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000633 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000634}
635