blob: 4d2aa7298cb40fd60a84d8b31b10acfc09d1fef1 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* Pcre objects */
33
34#include "Python.h"
35
Guido van Rossum58132c61997-12-17 00:24:13 +000036#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000037#ifndef Py_eval_input
38/* For Python 1.4, graminit.h has to be explicitly included */
39#include "graminit.h"
40#define Py_eval_input eval_input
41#endif
42
43#ifndef FOR_PYTHON
44#define FOR_PYTHON
45#endif
46
47#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000048#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000049
50static PyObject *ErrorObject;
51
52typedef struct {
53 PyObject_HEAD
54 pcre *regex;
55 pcre_extra *regex_extra;
56 int num_groups;
57} PcreObject;
58
59staticforward PyTypeObject Pcre_Type;
60
61#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
62#define NORMAL 0
63#define CHARCLASS 1
64#define REPLACEMENT 2
65
66#define CHAR 0
67#define MEMORY_REFERENCE 1
68#define SYNTAX 2
69#define NOT_SYNTAX 3
70#define SET 4
71#define WORD_BOUNDARY 5
72#define NOT_WORD_BOUNDARY 6
73#define BEGINNING_OF_BUFFER 7
74#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000075#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000076
77static PcreObject *
78newPcreObject(arg)
79 PyObject *arg;
80{
81 PcreObject *self;
82 self = PyObject_NEW(PcreObject, &Pcre_Type);
83 if (self == NULL)
84 return NULL;
85 self->regex = NULL;
86 self->regex_extra = NULL;
87 return self;
88}
89
90/* Pcre methods */
91
92static void
93PyPcre_dealloc(self)
94 PcreObject *self;
95{
Andrew M. Kuchling0c7822e2000-02-18 18:30:01 +000096 if (self->regex) (pcre_free)(self->regex);
97 if (self->regex_extra) (pcre_free)(self->regex_extra);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000098 PyMem_DEL(self);
99}
100
101
102static PyObject *
103PyPcre_exec(self, args)
104 PcreObject *self;
105 PyObject *args;
106{
Guido van Rossum50700601997-12-08 17:15:20 +0000107 char *string;
108 int stringlen, pos = 0, options=0, endpos = -1, i, count;
109 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000110 PyObject *list;
111
Guido van Rossum43713e52000-02-29 13:59:29 +0000112 if (!PyArg_ParseTuple(args, "t#|iiii:match", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000113 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +0000114 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000115 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +0000116 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000117 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +0000118 /* If an error occurred during the match, and an exception was raised,
119 just return NULL and leave the exception alone. The most likely
120 problem to cause this would be running out of memory for
121 the failure stack. */
122 if (PyErr_Occurred())
123 {
124 return NULL;
125 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000126 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
127 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000128 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000129 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
130 PyErr_SetObject(ErrorObject, errval);
131 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000132 return NULL;
133 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000134
135 list=PyList_New(self->num_groups+1);
136 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000137 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000138 {
139 PyObject *v;
140 int start=offsets[i*2], end=offsets[i*2+1];
141 /* If the group wasn't affected by the match, return -1, -1 */
142 if (start<0 || count<=i)
143 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000144 v=Py_BuildValue("ii", start, end);
145 if (v==NULL) {Py_DECREF(list); return NULL;}
146 PyList_SetItem(list, i, v);
147 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000148 return list;
149}
150
151static PyMethodDef Pcre_methods[] = {
152 {"match", (PyCFunction)PyPcre_exec, 1},
153 {NULL, NULL} /* sentinel */
154};
155
156static PyObject *
157PyPcre_getattr(self, name)
158 PcreObject *self;
159 char *name;
160{
161 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
162}
163
164
165staticforward PyTypeObject Pcre_Type = {
Fred Drake0d40ba42000-02-04 20:33:49 +0000166 PyObject_HEAD_INIT(NULL)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000167 0, /*ob_size*/
168 "Pcre", /*tp_name*/
169 sizeof(PcreObject), /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 /* methods */
172 (destructor)PyPcre_dealloc, /*tp_dealloc*/
173 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000174 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000175 0, /*tp_setattr*/
176 0, /*tp_compare*/
177 0, /*tp_repr*/
178 0, /*tp_as_number*/
179 0, /*tp_as_sequence*/
180 0, /*tp_as_mapping*/
181 0, /*tp_hash*/
182};
183/* --------------------------------------------------------------------- */
184
185static PyObject *
186PyPcre_compile(self, args)
187 PyObject *self; /* Not used */
188 PyObject *args;
189{
190 PcreObject *rv;
191 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000192 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000193 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000194
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000195 int options, erroroffset;
Guido van Rossum43713e52000-02-29 13:59:29 +0000196 if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000197 &PyDict_Type, &dictionary))
198 return NULL;
199 rv = newPcreObject(args);
200 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000201 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000202
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000203 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000204 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000205 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000206 {
Guido van Rossumc4428c52000-04-25 15:59:32 +0000207 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000208 if (!PyErr_Occurred())
209 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000210 PyObject *errval = Py_BuildValue("si", error, erroroffset);
211 PyErr_SetObject(ErrorObject, errval);
212 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000213 }
214 return NULL;
215 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000216 rv->regex_extra=pcre_study(rv->regex, 0, &error);
217 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000218 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000219 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000220 Py_DECREF(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000221 PyErr_SetObject(ErrorObject, errval);
222 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000223 return NULL;
224 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000225 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
226 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000227 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000228 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
229 PyErr_SetObject(ErrorObject, errval);
230 Py_XDECREF(errval);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000231 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000232 return NULL;
233 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000234 return (PyObject *)rv;
235}
236
237static PyObject *
Guido van Rossumc3861071997-10-08 02:07:40 +0000238PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
Guido van Rossum50700601997-12-08 17:15:20 +0000239 unsigned char *pattern;
240 int pattern_len, *indexptr, *typeptr;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000241{
Guido van Rossum50700601997-12-08 17:15:20 +0000242 unsigned char c;
243 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000244
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000245 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000246 {
247 PyErr_SetString(ErrorObject, "escape ends too soon");
248 return NULL;
249 }
250 c=pattern[index]; index++;
251 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000252
Guido van Rossum50700601997-12-08 17:15:20 +0000253 switch (c)
254 {
255 case('t'):
256 *indexptr=index;
257 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000258 case('n'):
259 *indexptr = index;
260 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000261 case('v'):
262 *indexptr = index;
263 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000264 case('r'):
265 *indexptr = index;
266 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000267 case('f'):
268 *indexptr = index;
269 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000270 case('a'):
271 *indexptr = index;
272 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000273 case('b'):
274 *indexptr=index;
275 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000276 case('\\'):
277 *indexptr=index;
278 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000279
Guido van Rossum50700601997-12-08 17:15:20 +0000280 case('x'):
281 {
282 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000283
Guido van Rossum50700601997-12-08 17:15:20 +0000284 x = 0; end = index;
285 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
286 {
287 ch = pattern[end];
288 x = x * 16 + pcre_lcc[ch] -
289 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
290 x &= 255;
291 end++;
292 }
293 if (end==index)
294 {
295 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
296 return NULL;
297 }
298 *indexptr = end;
299 return Py_BuildValue("c", (char)x);
300 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000301 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000302
303 case('E'): case('G'): case('L'): case('Q'):
304 case('U'): case('l'): case('u'):
305 {
306 char message[50];
307 sprintf(message, "\\%c is not allowed", c);
308 PyErr_SetString(ErrorObject, message);
309 return NULL;
310 }
311
312 case('g'):
313 {
314 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000315 int group_num = 0, is_number=0;
316
Guido van Rossum50700601997-12-08 17:15:20 +0000317 if (pattern_len<=index)
318 {
319 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
320 return NULL;
321 }
322 if (pattern[index]!='<')
323 {
324 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
325 return NULL;
326 }
327 index++;
328 end=index;
329 while (end<pattern_len && pattern[end]!='>')
330 end++;
331 if (end==pattern_len)
332 {
333 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
334 return NULL;
335 }
336
337 if (index==end) /* Zero-length name */
338 {
339 /* XXX should include the text of the reference */
340 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
341 return NULL;
342 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000343 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000344 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000345 is_number = 1;
346 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000347 }
348
349 for(i=index+1; i<end; i++)
350 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000351 if (is_number &&
352 !(pcre_ctypes[pattern[i]] & ctype_digit) )
353 {
354 /* XXX should include the text of the reference */
355 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
356 return NULL;
357 }
358 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000359 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
360 {
361 /* XXX should include the text of the reference */
362 PyErr_SetString(ErrorObject, "illegal symbolic reference");
363 return NULL;
364 }
365 }
366
367 *typeptr = MEMORY_REFERENCE;
368 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000369 /* If it's a number, return the integer value of the group */
370 if (is_number) return Py_BuildValue("i", group_num);
371 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000372 return Py_BuildValue("s#", pattern+index, end-index);
373 }
374 break;
375
376 case('0'):
377 {
378 /* \0 always indicates an octal escape, so we consume up to 3
379 characters, as long as they're all octal digits */
380 int octval=0, i;
381 index--;
382 for(i=index;
383 i<=index+2 && i<pattern_len
384 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
385 i++)
386 {
387 octval = octval * 8 + pattern[i] - '0';
388 }
389 if (octval>255)
390 {
391 PyErr_SetString(ErrorObject, "octal value out of range");
392 return NULL;
393 }
394 *indexptr = i;
395 return Py_BuildValue("c", (unsigned char)octval);
396 }
397 break;
398 case('1'): case('2'): case('3'): case('4'):
399 case('5'): case('6'): case('7'): case('8'):
400 case('9'):
401 {
402 /* Handle \?, where ? is from 1 through 9 */
403 int value=0;
404 index--;
405 /* If it's at least a two-digit reference, like \34, it might
406 either be a 3-digit octal escape (\123) or a 2-digit
407 decimal memory reference (\34) */
408
409 if ( (index+1) <pattern_len &&
410 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
411 {
412 if ( (index+2) <pattern_len &&
413 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
414 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
415 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
416 )
417 {
418 /* 3 octal digits */
419 value= 8*8*(pattern[index ]-'0') +
420 8*(pattern[index+1]-'0') +
421 (pattern[index+2]-'0');
422 if (value>255)
423 {
424 PyErr_SetString(ErrorObject, "octal value out of range");
425 return NULL;
426 }
427 *indexptr = index+3;
428 return Py_BuildValue("c", (unsigned char)value);
429 }
430 else
431 {
432 /* 2-digit form, so it's a memory reference */
433 value= 10*(pattern[index ]-'0') +
434 (pattern[index+1]-'0');
435 if (value<1 || EXTRACT_MAX<=value)
436 {
437 PyErr_SetString(ErrorObject, "memory reference out of range");
438 return NULL;
439 }
440 *typeptr = MEMORY_REFERENCE;
441 *indexptr = index+2;
442 return Py_BuildValue("i", value);
443 }
444 }
445 else
446 {
447 /* Single-digit form, like \2, so it's a memory reference */
448 *typeptr = MEMORY_REFERENCE;
449 *indexptr = index+1;
450 return Py_BuildValue("i", pattern[index]-'0');
451 }
452 }
453 break;
454
455 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000456 /* It's some unknown escape like \s, so return a string containing
457 \s */
458 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000459 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000460 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000461 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000462}
463
464static PyObject *
465PyPcre_expand(self, args)
466 PyObject *self;
467 PyObject *args;
468{
Guido van Rossum50700601997-12-08 17:15:20 +0000469 PyObject *results, *match_obj;
470 PyObject *repl_obj, *newstring;
471 unsigned char *repl;
472 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000473
Guido van Rossum43713e52000-02-29 13:59:29 +0000474 if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj))
Guido van Rossum50700601997-12-08 17:15:20 +0000475 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000476
Guido van Rossum50700601997-12-08 17:15:20 +0000477 repl=(unsigned char *)PyString_AsString(repl_obj);
478 size=PyString_Size(repl_obj);
479 results=PyList_New(0);
480 if (results==NULL) return NULL;
481 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000482 {
Guido van Rossum50700601997-12-08 17:15:20 +0000483 if (repl[i]=='\\')
484 {
485 PyObject *value;
486 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000487
Guido van Rossum50700601997-12-08 17:15:20 +0000488 if (start!=i)
489 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000490 int status;
491 PyObject *s = PyString_FromStringAndSize(
492 (char *)repl+start, i-start);
493 if (s == NULL) {
494 Py_DECREF(results);
495 return NULL;
496 }
497 status = PyList_Append(results, s);
498 Py_DECREF(s);
499 if (status < 0) {
500 Py_DECREF(results);
501 return NULL;
502 }
Guido van Rossum50700601997-12-08 17:15:20 +0000503 total_len += i-start;
504 }
505 i++;
506 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
507 if (value==NULL)
508 {
509 /* PyPcre_expand_escape triggered an exception of some sort,
510 so just return */
511 Py_DECREF(results);
512 return NULL;
513 }
514 switch (escape_type)
515 {
516 case (CHAR):
517 PyList_Append(results, value);
518 total_len += PyString_Size(value);
519 break;
520 case(MEMORY_REFERENCE):
521 {
522 PyObject *r, *tuple, *result;
523 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000524 if (r == NULL) {
525 Py_DECREF(results);
526 return NULL;
527 }
Guido van Rossum50700601997-12-08 17:15:20 +0000528 tuple=PyTuple_New(1);
529 Py_INCREF(value);
530 PyTuple_SetItem(tuple, 0, value);
531 result=PyEval_CallObject(r, tuple);
532 Py_DECREF(r); Py_DECREF(tuple);
533 if (result==NULL)
534 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000535 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000536 Py_DECREF(results);
537 Py_DECREF(value);
538 return NULL;
539 }
540 if (result==Py_None)
541 {
542 char message[50];
543 sprintf(message,
544 "group did not contribute to the match");
545 PyErr_SetString(ErrorObject,
546 message);
547 Py_DECREF(result);
548 Py_DECREF(value);
549 Py_DECREF(results);
550 return NULL;
551 }
552 /* typecheck that it's a string! */
553 if (!PyString_Check(result))
554 {
555 Py_DECREF(results);
556 Py_DECREF(result);
557 PyErr_SetString(ErrorObject,
558 "group() must return a string value for replacement");
559 return NULL;
560 }
561 PyList_Append(results, result);
562 total_len += PyString_Size(result);
563 Py_DECREF(result);
564 }
565 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000566 case(STRING):
567 {
568 PyList_Append(results, value);
569 total_len += PyString_Size(value);
570 break;
571 }
Guido van Rossum50700601997-12-08 17:15:20 +0000572 default:
573 Py_DECREF(results);
574 PyErr_SetString(ErrorObject,
575 "bad escape in replacement");
576 return NULL;
577 }
578 Py_DECREF(value);
579 start=i;
580 i--; /* Decrement now, because the 'for' loop will increment it */
581 }
582 } /* endif repl[i]!='\\' */
583
584 if (start!=i)
585 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000586 int status;
587 PyObject *s = PyString_FromStringAndSize((char *)repl+start,
588 i-start);
589 if (s == NULL) {
590 Py_DECREF(results);
591 return NULL;
592 }
593 status = PyList_Append(results, s);
594 Py_DECREF(s);
595 if (status < 0) {
596 Py_DECREF(results);
597 return NULL;
598 }
Guido van Rossum50700601997-12-08 17:15:20 +0000599 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000600 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000601
Guido van Rossum50700601997-12-08 17:15:20 +0000602 /* Whew! Now we've constructed a list containing various pieces of
603 strings that will make up our final result. So, iterate over
604 the list concatenating them. A new string measuring total_len
605 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000606
Guido van Rossum50700601997-12-08 17:15:20 +0000607 newstring=PyString_FromStringAndSize(NULL, total_len);
608 if (newstring==NULL)
609 {
610 Py_DECREF(results);
611 return NULL;
612 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000613
Guido van Rossum50700601997-12-08 17:15:20 +0000614 repl=(unsigned char *)PyString_AsString(newstring);
615 for (pos=i=0; i<PyList_Size(results); i++)
616 {
617 PyObject *item=PyList_GetItem(results, i);
618 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
619 pos += PyString_Size(item);
620 }
621 Py_DECREF(results);
622 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000623}
624
625
626/* List of functions defined in the module */
627
628static PyMethodDef pcre_methods[] = {
629 {"pcre_compile", PyPcre_compile, 1},
630 {"pcre_expand", PyPcre_expand, 1},
631 {NULL, NULL} /* sentinel */
632};
633
634
635/*
636 * Convenience routine to export an integer value.
637 * For simplicity, errors (which are unlikely anyway) are ignored.
638 */
639
640static void
641insint(d, name, value)
642 PyObject * d;
643 char * name;
644 int value;
645{
646 PyObject *v = PyInt_FromLong((long) value);
647 if (v == NULL) {
648 /* Don't bother reporting this error */
649 PyErr_Clear();
650 }
651 else {
652 PyDict_SetItemString(d, name, v);
653 Py_DECREF(v);
654 }
655}
656
657
658/* Initialization function for the module (*must* be called initpcre) */
659
Guido van Rossum3886bb61998-12-04 18:50:17 +0000660DL_EXPORT(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000661initpcre()
662{
663 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000664
Fred Drake0d40ba42000-02-04 20:33:49 +0000665 Pcre_Type.ob_type = &PyType_Type;
666
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000667 /* Create the module and add the functions */
668 m = Py_InitModule("pcre", pcre_methods);
669
670 /* Add some symbolic constants to the module */
671 d = PyModule_GetDict(m);
672 ErrorObject = PyString_FromString("pcre.error");
673 PyDict_SetItemString(d, "error", ErrorObject);
674
675 /* Insert the flags */
676 insint(d, "IGNORECASE", PCRE_CASELESS);
677 insint(d, "ANCHORED", PCRE_ANCHORED);
678 insint(d, "MULTILINE", PCRE_MULTILINE);
679 insint(d, "DOTALL", PCRE_DOTALL);
680 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000681 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000682
683 /* Check for errors */
684 if (PyErr_Occurred())
685 Py_FatalError("can't initialize module pcre");
686}
687