blob: fe45bacd564500ccb4e42eca4ab8e308b9c6e0ce [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/* Pcre objects */
2
3#include "Python.h"
4
Guido van Rossum58132c61997-12-17 00:24:13 +00005#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +00006#ifndef Py_eval_input
7/* For Python 1.4, graminit.h has to be explicitly included */
8#include "graminit.h"
9#define Py_eval_input eval_input
10#endif
11
12#ifndef FOR_PYTHON
13#define FOR_PYTHON
14#endif
15
16#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000017#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000018
19static PyObject *ErrorObject;
20
21typedef struct {
22 PyObject_HEAD
23 pcre *regex;
24 pcre_extra *regex_extra;
25 int num_groups;
26} PcreObject;
27
28staticforward PyTypeObject Pcre_Type;
29
30#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
31#define NORMAL 0
32#define CHARCLASS 1
33#define REPLACEMENT 2
34
35#define CHAR 0
36#define MEMORY_REFERENCE 1
37#define SYNTAX 2
38#define NOT_SYNTAX 3
39#define SET 4
40#define WORD_BOUNDARY 5
41#define NOT_WORD_BOUNDARY 6
42#define BEGINNING_OF_BUFFER 7
43#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000044#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000045
46static PcreObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000047newPcreObject(PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000048{
49 PcreObject *self;
Guido van Rossumb18618d2000-05-03 23:44:39 +000050 self = PyObject_New(PcreObject, &Pcre_Type);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000051 if (self == NULL)
52 return NULL;
53 self->regex = NULL;
54 self->regex_extra = NULL;
55 return self;
56}
57
58/* Pcre methods */
59
60static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000061PyPcre_dealloc(PcreObject *self)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000062{
Andrew M. Kuchling0c7822e2000-02-18 18:30:01 +000063 if (self->regex) (pcre_free)(self->regex);
64 if (self->regex_extra) (pcre_free)(self->regex_extra);
Guido van Rossumb18618d2000-05-03 23:44:39 +000065 PyObject_Del(self);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000066}
67
68
69static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000070PyPcre_exec(PcreObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000071{
Guido van Rossum50700601997-12-08 17:15:20 +000072 char *string;
73 int stringlen, pos = 0, options=0, endpos = -1, i, count;
74 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +000075 PyObject *list;
76
Guido van Rossum43713e52000-02-29 13:59:29 +000077 if (!PyArg_ParseTuple(args, "t#|iiii:match", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +000078 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +000079 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +000080 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +000081 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +000082 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +000083 /* If an error occurred during the match, and an exception was raised,
84 just return NULL and leave the exception alone. The most likely
85 problem to cause this would be running out of memory for
86 the failure stack. */
87 if (PyErr_Occurred())
88 {
89 return NULL;
90 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +000091 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
92 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +000093 {
Guido van Rossum58132c61997-12-17 00:24:13 +000094 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
95 PyErr_SetObject(ErrorObject, errval);
96 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +000097 return NULL;
98 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +000099
100 list=PyList_New(self->num_groups+1);
101 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000102 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000103 {
104 PyObject *v;
105 int start=offsets[i*2], end=offsets[i*2+1];
106 /* If the group wasn't affected by the match, return -1, -1 */
107 if (start<0 || count<=i)
108 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000109 v=Py_BuildValue("ii", start, end);
110 if (v==NULL) {Py_DECREF(list); return NULL;}
111 PyList_SetItem(list, i, v);
112 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000113 return list;
114}
115
116static PyMethodDef Pcre_methods[] = {
117 {"match", (PyCFunction)PyPcre_exec, 1},
118 {NULL, NULL} /* sentinel */
119};
120
121static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000122PyPcre_getattr(PcreObject *self, char *name)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000123{
124 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
125}
126
127
128staticforward PyTypeObject Pcre_Type = {
Fred Drake0d40ba42000-02-04 20:33:49 +0000129 PyObject_HEAD_INIT(NULL)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000130 0, /*ob_size*/
131 "Pcre", /*tp_name*/
132 sizeof(PcreObject), /*tp_basicsize*/
133 0, /*tp_itemsize*/
134 /* methods */
135 (destructor)PyPcre_dealloc, /*tp_dealloc*/
136 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000137 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000138 0, /*tp_setattr*/
139 0, /*tp_compare*/
140 0, /*tp_repr*/
141 0, /*tp_as_number*/
142 0, /*tp_as_sequence*/
143 0, /*tp_as_mapping*/
144 0, /*tp_hash*/
145};
146/* --------------------------------------------------------------------- */
147
148static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000149PyPcre_compile(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000150{
151 PcreObject *rv;
152 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000153 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000154 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000155
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000156 int options, erroroffset;
Guido van Rossum43713e52000-02-29 13:59:29 +0000157 if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000158 &PyDict_Type, &dictionary))
159 return NULL;
160 rv = newPcreObject(args);
161 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000162 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000163
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000164 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000165 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000166 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000167 {
Guido van Rossumc4428c52000-04-25 15:59:32 +0000168 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000169 if (!PyErr_Occurred())
170 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000171 PyObject *errval = Py_BuildValue("si", error, erroroffset);
172 PyErr_SetObject(ErrorObject, errval);
173 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000174 }
175 return NULL;
176 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000177 rv->regex_extra=pcre_study(rv->regex, 0, &error);
178 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000179 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000180 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000181 Py_DECREF(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000182 PyErr_SetObject(ErrorObject, errval);
183 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000184 return NULL;
185 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000186 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
187 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000188 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000189 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
190 PyErr_SetObject(ErrorObject, errval);
191 Py_XDECREF(errval);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000192 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000193 return NULL;
194 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000195 return (PyObject *)rv;
196}
197
198static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000199PyPcre_expand_escape(unsigned char *pattern, int pattern_len,
200 int *indexptr, int *typeptr)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000201{
Guido van Rossum50700601997-12-08 17:15:20 +0000202 unsigned char c;
203 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000204
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000205 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000206 {
207 PyErr_SetString(ErrorObject, "escape ends too soon");
208 return NULL;
209 }
210 c=pattern[index]; index++;
211 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000212
Guido van Rossum50700601997-12-08 17:15:20 +0000213 switch (c)
214 {
215 case('t'):
216 *indexptr=index;
217 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000218 case('n'):
219 *indexptr = index;
220 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000221 case('v'):
222 *indexptr = index;
223 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000224 case('r'):
225 *indexptr = index;
226 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000227 case('f'):
228 *indexptr = index;
229 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000230 case('a'):
231 *indexptr = index;
232 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000233 case('b'):
234 *indexptr=index;
235 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000236 case('\\'):
237 *indexptr=index;
238 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000239
Guido van Rossum50700601997-12-08 17:15:20 +0000240 case('x'):
241 {
242 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000243
Guido van Rossum50700601997-12-08 17:15:20 +0000244 x = 0; end = index;
245 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
246 {
247 ch = pattern[end];
248 x = x * 16 + pcre_lcc[ch] -
249 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
250 x &= 255;
251 end++;
252 }
253 if (end==index)
254 {
255 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
256 return NULL;
257 }
258 *indexptr = end;
259 return Py_BuildValue("c", (char)x);
260 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000261 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000262
263 case('E'): case('G'): case('L'): case('Q'):
264 case('U'): case('l'): case('u'):
265 {
266 char message[50];
267 sprintf(message, "\\%c is not allowed", c);
268 PyErr_SetString(ErrorObject, message);
269 return NULL;
270 }
271
272 case('g'):
273 {
274 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000275 int group_num = 0, is_number=0;
276
Guido van Rossum50700601997-12-08 17:15:20 +0000277 if (pattern_len<=index)
278 {
279 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
280 return NULL;
281 }
282 if (pattern[index]!='<')
283 {
284 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
285 return NULL;
286 }
287 index++;
288 end=index;
289 while (end<pattern_len && pattern[end]!='>')
290 end++;
291 if (end==pattern_len)
292 {
293 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
294 return NULL;
295 }
296
297 if (index==end) /* Zero-length name */
298 {
299 /* XXX should include the text of the reference */
300 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
301 return NULL;
302 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000303 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000304 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000305 is_number = 1;
306 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000307 }
308
309 for(i=index+1; i<end; i++)
310 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000311 if (is_number &&
312 !(pcre_ctypes[pattern[i]] & ctype_digit) )
313 {
314 /* XXX should include the text of the reference */
315 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
316 return NULL;
317 }
318 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000319 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
320 {
321 /* XXX should include the text of the reference */
322 PyErr_SetString(ErrorObject, "illegal symbolic reference");
323 return NULL;
324 }
325 }
326
327 *typeptr = MEMORY_REFERENCE;
328 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000329 /* If it's a number, return the integer value of the group */
330 if (is_number) return Py_BuildValue("i", group_num);
331 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000332 return Py_BuildValue("s#", pattern+index, end-index);
333 }
334 break;
335
336 case('0'):
337 {
338 /* \0 always indicates an octal escape, so we consume up to 3
339 characters, as long as they're all octal digits */
340 int octval=0, i;
341 index--;
342 for(i=index;
343 i<=index+2 && i<pattern_len
344 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
345 i++)
346 {
347 octval = octval * 8 + pattern[i] - '0';
348 }
349 if (octval>255)
350 {
351 PyErr_SetString(ErrorObject, "octal value out of range");
352 return NULL;
353 }
354 *indexptr = i;
355 return Py_BuildValue("c", (unsigned char)octval);
356 }
357 break;
358 case('1'): case('2'): case('3'): case('4'):
359 case('5'): case('6'): case('7'): case('8'):
360 case('9'):
361 {
362 /* Handle \?, where ? is from 1 through 9 */
363 int value=0;
364 index--;
365 /* If it's at least a two-digit reference, like \34, it might
366 either be a 3-digit octal escape (\123) or a 2-digit
367 decimal memory reference (\34) */
368
369 if ( (index+1) <pattern_len &&
370 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
371 {
372 if ( (index+2) <pattern_len &&
373 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
374 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
375 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
376 )
377 {
378 /* 3 octal digits */
379 value= 8*8*(pattern[index ]-'0') +
380 8*(pattern[index+1]-'0') +
381 (pattern[index+2]-'0');
382 if (value>255)
383 {
384 PyErr_SetString(ErrorObject, "octal value out of range");
385 return NULL;
386 }
387 *indexptr = index+3;
388 return Py_BuildValue("c", (unsigned char)value);
389 }
390 else
391 {
392 /* 2-digit form, so it's a memory reference */
393 value= 10*(pattern[index ]-'0') +
394 (pattern[index+1]-'0');
395 if (value<1 || EXTRACT_MAX<=value)
396 {
397 PyErr_SetString(ErrorObject, "memory reference out of range");
398 return NULL;
399 }
400 *typeptr = MEMORY_REFERENCE;
401 *indexptr = index+2;
402 return Py_BuildValue("i", value);
403 }
404 }
405 else
406 {
407 /* Single-digit form, like \2, so it's a memory reference */
408 *typeptr = MEMORY_REFERENCE;
409 *indexptr = index+1;
410 return Py_BuildValue("i", pattern[index]-'0');
411 }
412 }
413 break;
414
415 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000416 /* It's some unknown escape like \s, so return a string containing
417 \s */
418 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000419 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000420 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000421 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000422}
423
424static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000425PyPcre_expand(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000426{
Guido van Rossum50700601997-12-08 17:15:20 +0000427 PyObject *results, *match_obj;
428 PyObject *repl_obj, *newstring;
429 unsigned char *repl;
430 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000431
Guido van Rossum43713e52000-02-29 13:59:29 +0000432 if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj))
Guido van Rossum50700601997-12-08 17:15:20 +0000433 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000434
Guido van Rossum50700601997-12-08 17:15:20 +0000435 repl=(unsigned char *)PyString_AsString(repl_obj);
436 size=PyString_Size(repl_obj);
437 results=PyList_New(0);
438 if (results==NULL) return NULL;
439 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000440 {
Guido van Rossum50700601997-12-08 17:15:20 +0000441 if (repl[i]=='\\')
442 {
443 PyObject *value;
444 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000445
Guido van Rossum50700601997-12-08 17:15:20 +0000446 if (start!=i)
447 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000448 int status;
449 PyObject *s = PyString_FromStringAndSize(
450 (char *)repl+start, i-start);
451 if (s == NULL) {
452 Py_DECREF(results);
453 return NULL;
454 }
455 status = PyList_Append(results, s);
456 Py_DECREF(s);
457 if (status < 0) {
458 Py_DECREF(results);
459 return NULL;
460 }
Guido van Rossum50700601997-12-08 17:15:20 +0000461 total_len += i-start;
462 }
463 i++;
464 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
465 if (value==NULL)
466 {
467 /* PyPcre_expand_escape triggered an exception of some sort,
468 so just return */
469 Py_DECREF(results);
470 return NULL;
471 }
472 switch (escape_type)
473 {
474 case (CHAR):
475 PyList_Append(results, value);
476 total_len += PyString_Size(value);
477 break;
478 case(MEMORY_REFERENCE):
479 {
480 PyObject *r, *tuple, *result;
481 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000482 if (r == NULL) {
483 Py_DECREF(results);
484 return NULL;
485 }
Guido van Rossum50700601997-12-08 17:15:20 +0000486 tuple=PyTuple_New(1);
487 Py_INCREF(value);
488 PyTuple_SetItem(tuple, 0, value);
489 result=PyEval_CallObject(r, tuple);
490 Py_DECREF(r); Py_DECREF(tuple);
491 if (result==NULL)
492 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000493 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000494 Py_DECREF(results);
495 Py_DECREF(value);
496 return NULL;
497 }
498 if (result==Py_None)
499 {
500 char message[50];
501 sprintf(message,
502 "group did not contribute to the match");
503 PyErr_SetString(ErrorObject,
504 message);
505 Py_DECREF(result);
506 Py_DECREF(value);
507 Py_DECREF(results);
508 return NULL;
509 }
510 /* typecheck that it's a string! */
511 if (!PyString_Check(result))
512 {
513 Py_DECREF(results);
514 Py_DECREF(result);
515 PyErr_SetString(ErrorObject,
516 "group() must return a string value for replacement");
517 return NULL;
518 }
519 PyList_Append(results, result);
520 total_len += PyString_Size(result);
521 Py_DECREF(result);
522 }
523 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000524 case(STRING):
525 {
526 PyList_Append(results, value);
527 total_len += PyString_Size(value);
528 break;
529 }
Guido van Rossum50700601997-12-08 17:15:20 +0000530 default:
531 Py_DECREF(results);
532 PyErr_SetString(ErrorObject,
533 "bad escape in replacement");
534 return NULL;
535 }
536 Py_DECREF(value);
537 start=i;
538 i--; /* Decrement now, because the 'for' loop will increment it */
539 }
540 } /* endif repl[i]!='\\' */
541
542 if (start!=i)
543 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000544 int status;
545 PyObject *s = PyString_FromStringAndSize((char *)repl+start,
546 i-start);
547 if (s == NULL) {
548 Py_DECREF(results);
549 return NULL;
550 }
551 status = PyList_Append(results, s);
552 Py_DECREF(s);
553 if (status < 0) {
554 Py_DECREF(results);
555 return NULL;
556 }
Guido van Rossum50700601997-12-08 17:15:20 +0000557 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000558 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000559
Guido van Rossum50700601997-12-08 17:15:20 +0000560 /* Whew! Now we've constructed a list containing various pieces of
561 strings that will make up our final result. So, iterate over
562 the list concatenating them. A new string measuring total_len
563 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000564
Guido van Rossum50700601997-12-08 17:15:20 +0000565 newstring=PyString_FromStringAndSize(NULL, total_len);
566 if (newstring==NULL)
567 {
568 Py_DECREF(results);
569 return NULL;
570 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000571
Guido van Rossum50700601997-12-08 17:15:20 +0000572 repl=(unsigned char *)PyString_AsString(newstring);
573 for (pos=i=0; i<PyList_Size(results); i++)
574 {
575 PyObject *item=PyList_GetItem(results, i);
576 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
577 pos += PyString_Size(item);
578 }
579 Py_DECREF(results);
580 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000581}
582
583
584/* List of functions defined in the module */
585
586static PyMethodDef pcre_methods[] = {
587 {"pcre_compile", PyPcre_compile, 1},
588 {"pcre_expand", PyPcre_expand, 1},
589 {NULL, NULL} /* sentinel */
590};
591
592
593/*
594 * Convenience routine to export an integer value.
595 * For simplicity, errors (which are unlikely anyway) are ignored.
596 */
597
598static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000599insint(PyObject *d, char *name, int value)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000600{
601 PyObject *v = PyInt_FromLong((long) value);
602 if (v == NULL) {
603 /* Don't bother reporting this error */
604 PyErr_Clear();
605 }
606 else {
607 PyDict_SetItemString(d, name, v);
608 Py_DECREF(v);
609 }
610}
611
612
613/* Initialization function for the module (*must* be called initpcre) */
614
Guido van Rossum3886bb61998-12-04 18:50:17 +0000615DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000616initpcre(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000617{
618 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000619
Fred Drake0d40ba42000-02-04 20:33:49 +0000620 Pcre_Type.ob_type = &PyType_Type;
621
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000622 /* Create the module and add the functions */
623 m = Py_InitModule("pcre", pcre_methods);
624
625 /* Add some symbolic constants to the module */
626 d = PyModule_GetDict(m);
Fred Drake589c35b2000-07-06 19:38:49 +0000627 ErrorObject = PyErr_NewException("pcre.error", NULL, NULL);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000628 PyDict_SetItemString(d, "error", ErrorObject);
629
630 /* Insert the flags */
631 insint(d, "IGNORECASE", PCRE_CASELESS);
632 insint(d, "ANCHORED", PCRE_ANCHORED);
633 insint(d, "MULTILINE", PCRE_MULTILINE);
634 insint(d, "DOTALL", PCRE_DOTALL);
635 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000636 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000637}
638