blob: af5b25456449828ce57c971da9e95e1563b4ff8d [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/* Pcre objects */
2
3#include "Python.h"
4
Guido van Rossum58132c61997-12-17 00:24:13 +00005#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +00006#ifndef Py_eval_input
7/* For Python 1.4, graminit.h has to be explicitly included */
8#include "graminit.h"
9#define Py_eval_input eval_input
10#endif
11
12#ifndef FOR_PYTHON
13#define FOR_PYTHON
14#endif
15
16#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000017#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000018
19static PyObject *ErrorObject;
20
21typedef struct {
22 PyObject_HEAD
23 pcre *regex;
24 pcre_extra *regex_extra;
25 int num_groups;
26} PcreObject;
27
28staticforward PyTypeObject Pcre_Type;
29
30#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
31#define NORMAL 0
32#define CHARCLASS 1
33#define REPLACEMENT 2
34
35#define CHAR 0
36#define MEMORY_REFERENCE 1
37#define SYNTAX 2
38#define NOT_SYNTAX 3
39#define SET 4
40#define WORD_BOUNDARY 5
41#define NOT_WORD_BOUNDARY 6
42#define BEGINNING_OF_BUFFER 7
43#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000044#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000045
46static PcreObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000047newPcreObject(PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000048{
49 PcreObject *self;
Guido van Rossumb18618d2000-05-03 23:44:39 +000050 self = PyObject_New(PcreObject, &Pcre_Type);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000051 if (self == NULL)
52 return NULL;
53 self->regex = NULL;
54 self->regex_extra = NULL;
55 return self;
56}
57
58/* Pcre methods */
59
60static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000061PyPcre_dealloc(PcreObject *self)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000062{
Andrew M. Kuchling0c7822e2000-02-18 18:30:01 +000063 if (self->regex) (pcre_free)(self->regex);
64 if (self->regex_extra) (pcre_free)(self->regex_extra);
Guido van Rossumb18618d2000-05-03 23:44:39 +000065 PyObject_Del(self);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000066}
67
68
69static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000070PyPcre_exec(PcreObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000071{
Guido van Rossum50700601997-12-08 17:15:20 +000072 char *string;
73 int stringlen, pos = 0, options=0, endpos = -1, i, count;
74 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +000075 PyObject *list;
76
Andrew M. Kuchling888aa262001-06-18 19:04:04 +000077 if (!PyArg_ParseTuple(args, "t#|iii:match", &string, &stringlen,
78 &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +000079 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +000080 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +000081 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +000082 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +000083 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +000084 /* If an error occurred during the match, and an exception was raised,
85 just return NULL and leave the exception alone. The most likely
86 problem to cause this would be running out of memory for
87 the failure stack. */
88 if (PyErr_Occurred())
89 {
90 return NULL;
91 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +000092 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
93 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +000094 {
Guido van Rossum58132c61997-12-17 00:24:13 +000095 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
96 PyErr_SetObject(ErrorObject, errval);
97 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +000098 return NULL;
99 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000100
101 list=PyList_New(self->num_groups+1);
102 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000103 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000104 {
105 PyObject *v;
106 int start=offsets[i*2], end=offsets[i*2+1];
107 /* If the group wasn't affected by the match, return -1, -1 */
108 if (start<0 || count<=i)
109 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000110 v=Py_BuildValue("ii", start, end);
111 if (v==NULL) {Py_DECREF(list); return NULL;}
112 PyList_SetItem(list, i, v);
113 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000114 return list;
115}
116
117static PyMethodDef Pcre_methods[] = {
118 {"match", (PyCFunction)PyPcre_exec, 1},
119 {NULL, NULL} /* sentinel */
120};
121
122static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000123PyPcre_getattr(PcreObject *self, char *name)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000124{
125 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
126}
127
128
129staticforward PyTypeObject Pcre_Type = {
Fred Drake0d40ba42000-02-04 20:33:49 +0000130 PyObject_HEAD_INIT(NULL)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000131 0, /*ob_size*/
132 "Pcre", /*tp_name*/
133 sizeof(PcreObject), /*tp_basicsize*/
134 0, /*tp_itemsize*/
135 /* methods */
136 (destructor)PyPcre_dealloc, /*tp_dealloc*/
137 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000138 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000139 0, /*tp_setattr*/
140 0, /*tp_compare*/
141 0, /*tp_repr*/
142 0, /*tp_as_number*/
143 0, /*tp_as_sequence*/
144 0, /*tp_as_mapping*/
145 0, /*tp_hash*/
146};
147/* --------------------------------------------------------------------- */
148
149static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000150PyPcre_compile(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000151{
152 PcreObject *rv;
153 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000154 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000155 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000156
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000157 int options, erroroffset;
Guido van Rossum43713e52000-02-29 13:59:29 +0000158 if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000159 &PyDict_Type, &dictionary))
160 return NULL;
161 rv = newPcreObject(args);
162 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000163 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000164
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000165 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000166 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000167 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000168 {
Guido van Rossumc4428c52000-04-25 15:59:32 +0000169 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000170 if (!PyErr_Occurred())
171 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000172 PyObject *errval = Py_BuildValue("si", error, erroroffset);
173 PyErr_SetObject(ErrorObject, errval);
174 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000175 }
176 return NULL;
177 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000178 rv->regex_extra=pcre_study(rv->regex, 0, &error);
179 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000180 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000181 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000182 Py_DECREF(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000183 PyErr_SetObject(ErrorObject, errval);
184 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000185 return NULL;
186 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000187 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
188 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000189 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000190 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
191 PyErr_SetObject(ErrorObject, errval);
192 Py_XDECREF(errval);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000193 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000194 return NULL;
195 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000196 return (PyObject *)rv;
197}
198
199static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000200PyPcre_expand_escape(unsigned char *pattern, int pattern_len,
201 int *indexptr, int *typeptr)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000202{
Guido van Rossum50700601997-12-08 17:15:20 +0000203 unsigned char c;
204 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000205
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000206 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000207 {
208 PyErr_SetString(ErrorObject, "escape ends too soon");
209 return NULL;
210 }
211 c=pattern[index]; index++;
212 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000213
Guido van Rossum50700601997-12-08 17:15:20 +0000214 switch (c)
215 {
216 case('t'):
217 *indexptr=index;
218 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000219 case('n'):
220 *indexptr = index;
221 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000222 case('v'):
223 *indexptr = index;
224 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000225 case('r'):
226 *indexptr = index;
227 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000228 case('f'):
229 *indexptr = index;
230 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000231 case('a'):
232 *indexptr = index;
233 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000234 case('b'):
235 *indexptr=index;
236 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000237 case('\\'):
238 *indexptr=index;
239 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000240
Guido van Rossum50700601997-12-08 17:15:20 +0000241 case('x'):
242 {
243 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000244
Guido van Rossum50700601997-12-08 17:15:20 +0000245 x = 0; end = index;
246 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
247 {
248 ch = pattern[end];
249 x = x * 16 + pcre_lcc[ch] -
250 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
251 x &= 255;
252 end++;
253 }
254 if (end==index)
255 {
256 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
257 return NULL;
258 }
259 *indexptr = end;
260 return Py_BuildValue("c", (char)x);
261 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000262 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000263
264 case('E'): case('G'): case('L'): case('Q'):
265 case('U'): case('l'): case('u'):
266 {
267 char message[50];
268 sprintf(message, "\\%c is not allowed", c);
269 PyErr_SetString(ErrorObject, message);
270 return NULL;
271 }
272
273 case('g'):
274 {
275 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000276 int group_num = 0, is_number=0;
277
Guido van Rossum50700601997-12-08 17:15:20 +0000278 if (pattern_len<=index)
279 {
280 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
281 return NULL;
282 }
283 if (pattern[index]!='<')
284 {
285 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
286 return NULL;
287 }
288 index++;
289 end=index;
290 while (end<pattern_len && pattern[end]!='>')
291 end++;
292 if (end==pattern_len)
293 {
294 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
295 return NULL;
296 }
297
298 if (index==end) /* Zero-length name */
299 {
300 /* XXX should include the text of the reference */
301 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
302 return NULL;
303 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000304 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000305 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000306 is_number = 1;
307 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000308 }
309
310 for(i=index+1; i<end; i++)
311 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000312 if (is_number &&
313 !(pcre_ctypes[pattern[i]] & ctype_digit) )
314 {
315 /* XXX should include the text of the reference */
316 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
317 return NULL;
318 }
319 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000320 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
321 {
322 /* XXX should include the text of the reference */
323 PyErr_SetString(ErrorObject, "illegal symbolic reference");
324 return NULL;
325 }
326 }
327
328 *typeptr = MEMORY_REFERENCE;
329 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000330 /* If it's a number, return the integer value of the group */
331 if (is_number) return Py_BuildValue("i", group_num);
332 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000333 return Py_BuildValue("s#", pattern+index, end-index);
334 }
335 break;
336
337 case('0'):
338 {
339 /* \0 always indicates an octal escape, so we consume up to 3
340 characters, as long as they're all octal digits */
341 int octval=0, i;
342 index--;
343 for(i=index;
344 i<=index+2 && i<pattern_len
345 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
346 i++)
347 {
348 octval = octval * 8 + pattern[i] - '0';
349 }
350 if (octval>255)
351 {
352 PyErr_SetString(ErrorObject, "octal value out of range");
353 return NULL;
354 }
355 *indexptr = i;
356 return Py_BuildValue("c", (unsigned char)octval);
357 }
358 break;
359 case('1'): case('2'): case('3'): case('4'):
360 case('5'): case('6'): case('7'): case('8'):
361 case('9'):
362 {
363 /* Handle \?, where ? is from 1 through 9 */
364 int value=0;
365 index--;
366 /* If it's at least a two-digit reference, like \34, it might
367 either be a 3-digit octal escape (\123) or a 2-digit
368 decimal memory reference (\34) */
369
370 if ( (index+1) <pattern_len &&
371 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
372 {
373 if ( (index+2) <pattern_len &&
374 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
375 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
376 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
377 )
378 {
379 /* 3 octal digits */
380 value= 8*8*(pattern[index ]-'0') +
381 8*(pattern[index+1]-'0') +
382 (pattern[index+2]-'0');
383 if (value>255)
384 {
385 PyErr_SetString(ErrorObject, "octal value out of range");
386 return NULL;
387 }
388 *indexptr = index+3;
389 return Py_BuildValue("c", (unsigned char)value);
390 }
391 else
392 {
393 /* 2-digit form, so it's a memory reference */
394 value= 10*(pattern[index ]-'0') +
395 (pattern[index+1]-'0');
396 if (value<1 || EXTRACT_MAX<=value)
397 {
398 PyErr_SetString(ErrorObject, "memory reference out of range");
399 return NULL;
400 }
401 *typeptr = MEMORY_REFERENCE;
402 *indexptr = index+2;
403 return Py_BuildValue("i", value);
404 }
405 }
406 else
407 {
408 /* Single-digit form, like \2, so it's a memory reference */
409 *typeptr = MEMORY_REFERENCE;
410 *indexptr = index+1;
411 return Py_BuildValue("i", pattern[index]-'0');
412 }
413 }
414 break;
415
416 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000417 /* It's some unknown escape like \s, so return a string containing
418 \s */
419 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000420 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000421 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000422 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000423}
424
425static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000426PyPcre_expand(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000427{
Guido van Rossum50700601997-12-08 17:15:20 +0000428 PyObject *results, *match_obj;
429 PyObject *repl_obj, *newstring;
430 unsigned char *repl;
431 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000432
Guido van Rossum43713e52000-02-29 13:59:29 +0000433 if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj))
Guido van Rossum50700601997-12-08 17:15:20 +0000434 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000435
Guido van Rossum50700601997-12-08 17:15:20 +0000436 repl=(unsigned char *)PyString_AsString(repl_obj);
437 size=PyString_Size(repl_obj);
438 results=PyList_New(0);
439 if (results==NULL) return NULL;
440 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000441 {
Guido van Rossum50700601997-12-08 17:15:20 +0000442 if (repl[i]=='\\')
443 {
444 PyObject *value;
445 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000446
Guido van Rossum50700601997-12-08 17:15:20 +0000447 if (start!=i)
448 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000449 int status;
450 PyObject *s = PyString_FromStringAndSize(
451 (char *)repl+start, i-start);
452 if (s == NULL) {
453 Py_DECREF(results);
454 return NULL;
455 }
456 status = PyList_Append(results, s);
457 Py_DECREF(s);
458 if (status < 0) {
459 Py_DECREF(results);
460 return NULL;
461 }
Guido van Rossum50700601997-12-08 17:15:20 +0000462 total_len += i-start;
463 }
464 i++;
465 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
466 if (value==NULL)
467 {
468 /* PyPcre_expand_escape triggered an exception of some sort,
469 so just return */
470 Py_DECREF(results);
471 return NULL;
472 }
473 switch (escape_type)
474 {
475 case (CHAR):
476 PyList_Append(results, value);
477 total_len += PyString_Size(value);
478 break;
479 case(MEMORY_REFERENCE):
480 {
481 PyObject *r, *tuple, *result;
482 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000483 if (r == NULL) {
484 Py_DECREF(results);
485 return NULL;
486 }
Guido van Rossum50700601997-12-08 17:15:20 +0000487 tuple=PyTuple_New(1);
488 Py_INCREF(value);
489 PyTuple_SetItem(tuple, 0, value);
490 result=PyEval_CallObject(r, tuple);
491 Py_DECREF(r); Py_DECREF(tuple);
492 if (result==NULL)
493 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000494 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000495 Py_DECREF(results);
496 Py_DECREF(value);
497 return NULL;
498 }
499 if (result==Py_None)
500 {
501 char message[50];
502 sprintf(message,
503 "group did not contribute to the match");
504 PyErr_SetString(ErrorObject,
505 message);
506 Py_DECREF(result);
507 Py_DECREF(value);
508 Py_DECREF(results);
509 return NULL;
510 }
511 /* typecheck that it's a string! */
512 if (!PyString_Check(result))
513 {
514 Py_DECREF(results);
515 Py_DECREF(result);
516 PyErr_SetString(ErrorObject,
517 "group() must return a string value for replacement");
518 return NULL;
519 }
520 PyList_Append(results, result);
521 total_len += PyString_Size(result);
522 Py_DECREF(result);
523 }
524 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000525 case(STRING):
526 {
527 PyList_Append(results, value);
528 total_len += PyString_Size(value);
529 break;
530 }
Guido van Rossum50700601997-12-08 17:15:20 +0000531 default:
532 Py_DECREF(results);
533 PyErr_SetString(ErrorObject,
534 "bad escape in replacement");
535 return NULL;
536 }
537 Py_DECREF(value);
538 start=i;
539 i--; /* Decrement now, because the 'for' loop will increment it */
540 }
541 } /* endif repl[i]!='\\' */
542
543 if (start!=i)
544 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000545 int status;
546 PyObject *s = PyString_FromStringAndSize((char *)repl+start,
547 i-start);
548 if (s == NULL) {
549 Py_DECREF(results);
550 return NULL;
551 }
552 status = PyList_Append(results, s);
553 Py_DECREF(s);
554 if (status < 0) {
555 Py_DECREF(results);
556 return NULL;
557 }
Guido van Rossum50700601997-12-08 17:15:20 +0000558 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000559 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000560
Guido van Rossum50700601997-12-08 17:15:20 +0000561 /* Whew! Now we've constructed a list containing various pieces of
562 strings that will make up our final result. So, iterate over
563 the list concatenating them. A new string measuring total_len
564 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000565
Guido van Rossum50700601997-12-08 17:15:20 +0000566 newstring=PyString_FromStringAndSize(NULL, total_len);
567 if (newstring==NULL)
568 {
569 Py_DECREF(results);
570 return NULL;
571 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000572
Guido van Rossum50700601997-12-08 17:15:20 +0000573 repl=(unsigned char *)PyString_AsString(newstring);
574 for (pos=i=0; i<PyList_Size(results); i++)
575 {
576 PyObject *item=PyList_GetItem(results, i);
577 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
578 pos += PyString_Size(item);
579 }
580 Py_DECREF(results);
581 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000582}
583
584
585/* List of functions defined in the module */
586
587static PyMethodDef pcre_methods[] = {
588 {"pcre_compile", PyPcre_compile, 1},
589 {"pcre_expand", PyPcre_expand, 1},
590 {NULL, NULL} /* sentinel */
591};
592
593
594/*
595 * Convenience routine to export an integer value.
596 * For simplicity, errors (which are unlikely anyway) are ignored.
597 */
598
599static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000600insint(PyObject *d, char *name, int value)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000601{
602 PyObject *v = PyInt_FromLong((long) value);
603 if (v == NULL) {
604 /* Don't bother reporting this error */
605 PyErr_Clear();
606 }
607 else {
608 PyDict_SetItemString(d, name, v);
609 Py_DECREF(v);
610 }
611}
612
613
614/* Initialization function for the module (*must* be called initpcre) */
615
Guido van Rossum3886bb61998-12-04 18:50:17 +0000616DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000617initpcre(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000618{
619 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000620
Fred Drake0d40ba42000-02-04 20:33:49 +0000621 Pcre_Type.ob_type = &PyType_Type;
622
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000623 /* Create the module and add the functions */
624 m = Py_InitModule("pcre", pcre_methods);
625
626 /* Add some symbolic constants to the module */
627 d = PyModule_GetDict(m);
Fred Drake589c35b2000-07-06 19:38:49 +0000628 ErrorObject = PyErr_NewException("pcre.error", NULL, NULL);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000629 PyDict_SetItemString(d, "error", ErrorObject);
630
631 /* Insert the flags */
632 insint(d, "IGNORECASE", PCRE_CASELESS);
633 insint(d, "ANCHORED", PCRE_ANCHORED);
634 insint(d, "MULTILINE", PCRE_MULTILINE);
635 insint(d, "DOTALL", PCRE_DOTALL);
636 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000637 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000638}
639