blob: 6fb2d070172790e02f2843dbce2b5eefe320a50a [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007Copyright (c) 2000, BeOpen.com.
8Copyright (c) 1995-2000, Corporation for National Research Initiatives.
9Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
10All rights reserved.
Guido van Rossum51b3aa31997-10-06 14:43:11 +000011
Guido van Rossumfd71b9e2000-06-30 23:50:40 +000012See the file "Misc/COPYRIGHT" for information on usage and
13redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossum51b3aa31997-10-06 14:43:11 +000014
15******************************************************************/
16
17/* Pcre objects */
18
19#include "Python.h"
20
Guido van Rossum58132c61997-12-17 00:24:13 +000021#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000022#ifndef Py_eval_input
23/* For Python 1.4, graminit.h has to be explicitly included */
24#include "graminit.h"
25#define Py_eval_input eval_input
26#endif
27
28#ifndef FOR_PYTHON
29#define FOR_PYTHON
30#endif
31
32#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000033#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000034
35static PyObject *ErrorObject;
36
37typedef struct {
38 PyObject_HEAD
39 pcre *regex;
40 pcre_extra *regex_extra;
41 int num_groups;
42} PcreObject;
43
44staticforward PyTypeObject Pcre_Type;
45
46#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
47#define NORMAL 0
48#define CHARCLASS 1
49#define REPLACEMENT 2
50
51#define CHAR 0
52#define MEMORY_REFERENCE 1
53#define SYNTAX 2
54#define NOT_SYNTAX 3
55#define SET 4
56#define WORD_BOUNDARY 5
57#define NOT_WORD_BOUNDARY 6
58#define BEGINNING_OF_BUFFER 7
59#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000060#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000061
62static PcreObject *
63newPcreObject(arg)
64 PyObject *arg;
65{
66 PcreObject *self;
Guido van Rossumb18618d2000-05-03 23:44:39 +000067 self = PyObject_New(PcreObject, &Pcre_Type);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000068 if (self == NULL)
69 return NULL;
70 self->regex = NULL;
71 self->regex_extra = NULL;
72 return self;
73}
74
75/* Pcre methods */
76
77static void
78PyPcre_dealloc(self)
79 PcreObject *self;
80{
Andrew M. Kuchling0c7822e2000-02-18 18:30:01 +000081 if (self->regex) (pcre_free)(self->regex);
82 if (self->regex_extra) (pcre_free)(self->regex_extra);
Guido van Rossumb18618d2000-05-03 23:44:39 +000083 PyObject_Del(self);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000084}
85
86
87static PyObject *
88PyPcre_exec(self, args)
89 PcreObject *self;
90 PyObject *args;
91{
Guido van Rossum50700601997-12-08 17:15:20 +000092 char *string;
93 int stringlen, pos = 0, options=0, endpos = -1, i, count;
94 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +000095 PyObject *list;
96
Guido van Rossum43713e52000-02-29 13:59:29 +000097 if (!PyArg_ParseTuple(args, "t#|iiii:match", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +000098 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +000099 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000100 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +0000101 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000102 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +0000103 /* If an error occurred during the match, and an exception was raised,
104 just return NULL and leave the exception alone. The most likely
105 problem to cause this would be running out of memory for
106 the failure stack. */
107 if (PyErr_Occurred())
108 {
109 return NULL;
110 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000111 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
112 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000113 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000114 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
115 PyErr_SetObject(ErrorObject, errval);
116 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000117 return NULL;
118 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000119
120 list=PyList_New(self->num_groups+1);
121 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000122 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000123 {
124 PyObject *v;
125 int start=offsets[i*2], end=offsets[i*2+1];
126 /* If the group wasn't affected by the match, return -1, -1 */
127 if (start<0 || count<=i)
128 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000129 v=Py_BuildValue("ii", start, end);
130 if (v==NULL) {Py_DECREF(list); return NULL;}
131 PyList_SetItem(list, i, v);
132 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000133 return list;
134}
135
136static PyMethodDef Pcre_methods[] = {
137 {"match", (PyCFunction)PyPcre_exec, 1},
138 {NULL, NULL} /* sentinel */
139};
140
141static PyObject *
142PyPcre_getattr(self, name)
143 PcreObject *self;
144 char *name;
145{
146 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
147}
148
149
150staticforward PyTypeObject Pcre_Type = {
Fred Drake0d40ba42000-02-04 20:33:49 +0000151 PyObject_HEAD_INIT(NULL)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000152 0, /*ob_size*/
153 "Pcre", /*tp_name*/
154 sizeof(PcreObject), /*tp_basicsize*/
155 0, /*tp_itemsize*/
156 /* methods */
157 (destructor)PyPcre_dealloc, /*tp_dealloc*/
158 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000159 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000160 0, /*tp_setattr*/
161 0, /*tp_compare*/
162 0, /*tp_repr*/
163 0, /*tp_as_number*/
164 0, /*tp_as_sequence*/
165 0, /*tp_as_mapping*/
166 0, /*tp_hash*/
167};
168/* --------------------------------------------------------------------- */
169
170static PyObject *
171PyPcre_compile(self, args)
172 PyObject *self; /* Not used */
173 PyObject *args;
174{
175 PcreObject *rv;
176 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000177 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000178 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000179
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000180 int options, erroroffset;
Guido van Rossum43713e52000-02-29 13:59:29 +0000181 if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000182 &PyDict_Type, &dictionary))
183 return NULL;
184 rv = newPcreObject(args);
185 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000186 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000187
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000188 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000189 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000190 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000191 {
Guido van Rossumc4428c52000-04-25 15:59:32 +0000192 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000193 if (!PyErr_Occurred())
194 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000195 PyObject *errval = Py_BuildValue("si", error, erroroffset);
196 PyErr_SetObject(ErrorObject, errval);
197 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000198 }
199 return NULL;
200 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000201 rv->regex_extra=pcre_study(rv->regex, 0, &error);
202 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000203 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000204 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000205 Py_DECREF(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000206 PyErr_SetObject(ErrorObject, errval);
207 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000208 return NULL;
209 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000210 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
211 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000212 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000213 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
214 PyErr_SetObject(ErrorObject, errval);
215 Py_XDECREF(errval);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000216 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000217 return NULL;
218 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000219 return (PyObject *)rv;
220}
221
222static PyObject *
Guido van Rossumc3861071997-10-08 02:07:40 +0000223PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
Guido van Rossum50700601997-12-08 17:15:20 +0000224 unsigned char *pattern;
225 int pattern_len, *indexptr, *typeptr;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000226{
Guido van Rossum50700601997-12-08 17:15:20 +0000227 unsigned char c;
228 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000229
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000230 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000231 {
232 PyErr_SetString(ErrorObject, "escape ends too soon");
233 return NULL;
234 }
235 c=pattern[index]; index++;
236 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000237
Guido van Rossum50700601997-12-08 17:15:20 +0000238 switch (c)
239 {
240 case('t'):
241 *indexptr=index;
242 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000243 case('n'):
244 *indexptr = index;
245 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000246 case('v'):
247 *indexptr = index;
248 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000249 case('r'):
250 *indexptr = index;
251 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000252 case('f'):
253 *indexptr = index;
254 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000255 case('a'):
256 *indexptr = index;
257 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000258 case('b'):
259 *indexptr=index;
260 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000261 case('\\'):
262 *indexptr=index;
263 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000264
Guido van Rossum50700601997-12-08 17:15:20 +0000265 case('x'):
266 {
267 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000268
Guido van Rossum50700601997-12-08 17:15:20 +0000269 x = 0; end = index;
270 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
271 {
272 ch = pattern[end];
273 x = x * 16 + pcre_lcc[ch] -
274 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
275 x &= 255;
276 end++;
277 }
278 if (end==index)
279 {
280 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
281 return NULL;
282 }
283 *indexptr = end;
284 return Py_BuildValue("c", (char)x);
285 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000286 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000287
288 case('E'): case('G'): case('L'): case('Q'):
289 case('U'): case('l'): case('u'):
290 {
291 char message[50];
292 sprintf(message, "\\%c is not allowed", c);
293 PyErr_SetString(ErrorObject, message);
294 return NULL;
295 }
296
297 case('g'):
298 {
299 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000300 int group_num = 0, is_number=0;
301
Guido van Rossum50700601997-12-08 17:15:20 +0000302 if (pattern_len<=index)
303 {
304 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
305 return NULL;
306 }
307 if (pattern[index]!='<')
308 {
309 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
310 return NULL;
311 }
312 index++;
313 end=index;
314 while (end<pattern_len && pattern[end]!='>')
315 end++;
316 if (end==pattern_len)
317 {
318 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
319 return NULL;
320 }
321
322 if (index==end) /* Zero-length name */
323 {
324 /* XXX should include the text of the reference */
325 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
326 return NULL;
327 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000328 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000329 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000330 is_number = 1;
331 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000332 }
333
334 for(i=index+1; i<end; i++)
335 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000336 if (is_number &&
337 !(pcre_ctypes[pattern[i]] & ctype_digit) )
338 {
339 /* XXX should include the text of the reference */
340 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
341 return NULL;
342 }
343 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000344 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
345 {
346 /* XXX should include the text of the reference */
347 PyErr_SetString(ErrorObject, "illegal symbolic reference");
348 return NULL;
349 }
350 }
351
352 *typeptr = MEMORY_REFERENCE;
353 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000354 /* If it's a number, return the integer value of the group */
355 if (is_number) return Py_BuildValue("i", group_num);
356 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000357 return Py_BuildValue("s#", pattern+index, end-index);
358 }
359 break;
360
361 case('0'):
362 {
363 /* \0 always indicates an octal escape, so we consume up to 3
364 characters, as long as they're all octal digits */
365 int octval=0, i;
366 index--;
367 for(i=index;
368 i<=index+2 && i<pattern_len
369 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
370 i++)
371 {
372 octval = octval * 8 + pattern[i] - '0';
373 }
374 if (octval>255)
375 {
376 PyErr_SetString(ErrorObject, "octal value out of range");
377 return NULL;
378 }
379 *indexptr = i;
380 return Py_BuildValue("c", (unsigned char)octval);
381 }
382 break;
383 case('1'): case('2'): case('3'): case('4'):
384 case('5'): case('6'): case('7'): case('8'):
385 case('9'):
386 {
387 /* Handle \?, where ? is from 1 through 9 */
388 int value=0;
389 index--;
390 /* If it's at least a two-digit reference, like \34, it might
391 either be a 3-digit octal escape (\123) or a 2-digit
392 decimal memory reference (\34) */
393
394 if ( (index+1) <pattern_len &&
395 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
396 {
397 if ( (index+2) <pattern_len &&
398 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
399 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
400 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
401 )
402 {
403 /* 3 octal digits */
404 value= 8*8*(pattern[index ]-'0') +
405 8*(pattern[index+1]-'0') +
406 (pattern[index+2]-'0');
407 if (value>255)
408 {
409 PyErr_SetString(ErrorObject, "octal value out of range");
410 return NULL;
411 }
412 *indexptr = index+3;
413 return Py_BuildValue("c", (unsigned char)value);
414 }
415 else
416 {
417 /* 2-digit form, so it's a memory reference */
418 value= 10*(pattern[index ]-'0') +
419 (pattern[index+1]-'0');
420 if (value<1 || EXTRACT_MAX<=value)
421 {
422 PyErr_SetString(ErrorObject, "memory reference out of range");
423 return NULL;
424 }
425 *typeptr = MEMORY_REFERENCE;
426 *indexptr = index+2;
427 return Py_BuildValue("i", value);
428 }
429 }
430 else
431 {
432 /* Single-digit form, like \2, so it's a memory reference */
433 *typeptr = MEMORY_REFERENCE;
434 *indexptr = index+1;
435 return Py_BuildValue("i", pattern[index]-'0');
436 }
437 }
438 break;
439
440 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000441 /* It's some unknown escape like \s, so return a string containing
442 \s */
443 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000444 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000445 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000446 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000447}
448
449static PyObject *
450PyPcre_expand(self, args)
451 PyObject *self;
452 PyObject *args;
453{
Guido van Rossum50700601997-12-08 17:15:20 +0000454 PyObject *results, *match_obj;
455 PyObject *repl_obj, *newstring;
456 unsigned char *repl;
457 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000458
Guido van Rossum43713e52000-02-29 13:59:29 +0000459 if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj))
Guido van Rossum50700601997-12-08 17:15:20 +0000460 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000461
Guido van Rossum50700601997-12-08 17:15:20 +0000462 repl=(unsigned char *)PyString_AsString(repl_obj);
463 size=PyString_Size(repl_obj);
464 results=PyList_New(0);
465 if (results==NULL) return NULL;
466 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000467 {
Guido van Rossum50700601997-12-08 17:15:20 +0000468 if (repl[i]=='\\')
469 {
470 PyObject *value;
471 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000472
Guido van Rossum50700601997-12-08 17:15:20 +0000473 if (start!=i)
474 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000475 int status;
476 PyObject *s = PyString_FromStringAndSize(
477 (char *)repl+start, i-start);
478 if (s == NULL) {
479 Py_DECREF(results);
480 return NULL;
481 }
482 status = PyList_Append(results, s);
483 Py_DECREF(s);
484 if (status < 0) {
485 Py_DECREF(results);
486 return NULL;
487 }
Guido van Rossum50700601997-12-08 17:15:20 +0000488 total_len += i-start;
489 }
490 i++;
491 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
492 if (value==NULL)
493 {
494 /* PyPcre_expand_escape triggered an exception of some sort,
495 so just return */
496 Py_DECREF(results);
497 return NULL;
498 }
499 switch (escape_type)
500 {
501 case (CHAR):
502 PyList_Append(results, value);
503 total_len += PyString_Size(value);
504 break;
505 case(MEMORY_REFERENCE):
506 {
507 PyObject *r, *tuple, *result;
508 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000509 if (r == NULL) {
510 Py_DECREF(results);
511 return NULL;
512 }
Guido van Rossum50700601997-12-08 17:15:20 +0000513 tuple=PyTuple_New(1);
514 Py_INCREF(value);
515 PyTuple_SetItem(tuple, 0, value);
516 result=PyEval_CallObject(r, tuple);
517 Py_DECREF(r); Py_DECREF(tuple);
518 if (result==NULL)
519 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000520 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000521 Py_DECREF(results);
522 Py_DECREF(value);
523 return NULL;
524 }
525 if (result==Py_None)
526 {
527 char message[50];
528 sprintf(message,
529 "group did not contribute to the match");
530 PyErr_SetString(ErrorObject,
531 message);
532 Py_DECREF(result);
533 Py_DECREF(value);
534 Py_DECREF(results);
535 return NULL;
536 }
537 /* typecheck that it's a string! */
538 if (!PyString_Check(result))
539 {
540 Py_DECREF(results);
541 Py_DECREF(result);
542 PyErr_SetString(ErrorObject,
543 "group() must return a string value for replacement");
544 return NULL;
545 }
546 PyList_Append(results, result);
547 total_len += PyString_Size(result);
548 Py_DECREF(result);
549 }
550 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000551 case(STRING):
552 {
553 PyList_Append(results, value);
554 total_len += PyString_Size(value);
555 break;
556 }
Guido van Rossum50700601997-12-08 17:15:20 +0000557 default:
558 Py_DECREF(results);
559 PyErr_SetString(ErrorObject,
560 "bad escape in replacement");
561 return NULL;
562 }
563 Py_DECREF(value);
564 start=i;
565 i--; /* Decrement now, because the 'for' loop will increment it */
566 }
567 } /* endif repl[i]!='\\' */
568
569 if (start!=i)
570 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000571 int status;
572 PyObject *s = PyString_FromStringAndSize((char *)repl+start,
573 i-start);
574 if (s == NULL) {
575 Py_DECREF(results);
576 return NULL;
577 }
578 status = PyList_Append(results, s);
579 Py_DECREF(s);
580 if (status < 0) {
581 Py_DECREF(results);
582 return NULL;
583 }
Guido van Rossum50700601997-12-08 17:15:20 +0000584 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000585 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000586
Guido van Rossum50700601997-12-08 17:15:20 +0000587 /* Whew! Now we've constructed a list containing various pieces of
588 strings that will make up our final result. So, iterate over
589 the list concatenating them. A new string measuring total_len
590 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000591
Guido van Rossum50700601997-12-08 17:15:20 +0000592 newstring=PyString_FromStringAndSize(NULL, total_len);
593 if (newstring==NULL)
594 {
595 Py_DECREF(results);
596 return NULL;
597 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000598
Guido van Rossum50700601997-12-08 17:15:20 +0000599 repl=(unsigned char *)PyString_AsString(newstring);
600 for (pos=i=0; i<PyList_Size(results); i++)
601 {
602 PyObject *item=PyList_GetItem(results, i);
603 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
604 pos += PyString_Size(item);
605 }
606 Py_DECREF(results);
607 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000608}
609
610
611/* List of functions defined in the module */
612
613static PyMethodDef pcre_methods[] = {
614 {"pcre_compile", PyPcre_compile, 1},
615 {"pcre_expand", PyPcre_expand, 1},
616 {NULL, NULL} /* sentinel */
617};
618
619
620/*
621 * Convenience routine to export an integer value.
622 * For simplicity, errors (which are unlikely anyway) are ignored.
623 */
624
625static void
626insint(d, name, value)
627 PyObject * d;
628 char * name;
629 int value;
630{
631 PyObject *v = PyInt_FromLong((long) value);
632 if (v == NULL) {
633 /* Don't bother reporting this error */
634 PyErr_Clear();
635 }
636 else {
637 PyDict_SetItemString(d, name, v);
638 Py_DECREF(v);
639 }
640}
641
642
643/* Initialization function for the module (*must* be called initpcre) */
644
Guido van Rossum3886bb61998-12-04 18:50:17 +0000645DL_EXPORT(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000646initpcre()
647{
648 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000649
Fred Drake0d40ba42000-02-04 20:33:49 +0000650 Pcre_Type.ob_type = &PyType_Type;
651
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000652 /* Create the module and add the functions */
653 m = Py_InitModule("pcre", pcre_methods);
654
655 /* Add some symbolic constants to the module */
656 d = PyModule_GetDict(m);
Fred Drake589c35b2000-07-06 19:38:49 +0000657 ErrorObject = PyErr_NewException("pcre.error", NULL, NULL);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000658 PyDict_SetItemString(d, "error", ErrorObject);
659
660 /* Insert the flags */
661 insint(d, "IGNORECASE", PCRE_CASELESS);
662 insint(d, "ANCHORED", PCRE_ANCHORED);
663 insint(d, "MULTILINE", PCRE_MULTILINE);
664 insint(d, "DOTALL", PCRE_DOTALL);
665 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000666 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000667
668 /* Check for errors */
669 if (PyErr_Occurred())
670 Py_FatalError("can't initialize module pcre");
671}
672