blob: 3043405b0846128a7347660e1b901b5a77995853 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
Guido van Rossumfd71b9e2000-06-30 23:50:40 +00007Copyright (c) 2000, BeOpen.com.
8Copyright (c) 1995-2000, Corporation for National Research Initiatives.
9Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
10All rights reserved.
Guido van Rossum51b3aa31997-10-06 14:43:11 +000011
Guido van Rossumfd71b9e2000-06-30 23:50:40 +000012See the file "Misc/COPYRIGHT" for information on usage and
13redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
Guido van Rossum51b3aa31997-10-06 14:43:11 +000014
15******************************************************************/
16
17/* Pcre objects */
18
19#include "Python.h"
20
Guido van Rossum58132c61997-12-17 00:24:13 +000021#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000022#ifndef Py_eval_input
23/* For Python 1.4, graminit.h has to be explicitly included */
24#include "graminit.h"
25#define Py_eval_input eval_input
26#endif
27
28#ifndef FOR_PYTHON
29#define FOR_PYTHON
30#endif
31
32#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000033#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000034
35static PyObject *ErrorObject;
36
37typedef struct {
38 PyObject_HEAD
39 pcre *regex;
40 pcre_extra *regex_extra;
41 int num_groups;
42} PcreObject;
43
44staticforward PyTypeObject Pcre_Type;
45
46#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
47#define NORMAL 0
48#define CHARCLASS 1
49#define REPLACEMENT 2
50
51#define CHAR 0
52#define MEMORY_REFERENCE 1
53#define SYNTAX 2
54#define NOT_SYNTAX 3
55#define SET 4
56#define WORD_BOUNDARY 5
57#define NOT_WORD_BOUNDARY 6
58#define BEGINNING_OF_BUFFER 7
59#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000060#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000061
62static PcreObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000063newPcreObject(PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000064{
65 PcreObject *self;
Guido van Rossumb18618d2000-05-03 23:44:39 +000066 self = PyObject_New(PcreObject, &Pcre_Type);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000067 if (self == NULL)
68 return NULL;
69 self->regex = NULL;
70 self->regex_extra = NULL;
71 return self;
72}
73
74/* Pcre methods */
75
76static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000077PyPcre_dealloc(PcreObject *self)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000078{
Andrew M. Kuchling0c7822e2000-02-18 18:30:01 +000079 if (self->regex) (pcre_free)(self->regex);
80 if (self->regex_extra) (pcre_free)(self->regex_extra);
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_Del(self);
Guido van Rossum51b3aa31997-10-06 14:43:11 +000082}
83
84
85static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +000086PyPcre_exec(PcreObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +000087{
Guido van Rossum50700601997-12-08 17:15:20 +000088 char *string;
89 int stringlen, pos = 0, options=0, endpos = -1, i, count;
90 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +000091 PyObject *list;
92
Guido van Rossum43713e52000-02-29 13:59:29 +000093 if (!PyArg_ParseTuple(args, "t#|iiii:match", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +000094 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +000095 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +000096 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +000097 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +000098 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +000099 /* If an error occurred during the match, and an exception was raised,
100 just return NULL and leave the exception alone. The most likely
101 problem to cause this would be running out of memory for
102 the failure stack. */
103 if (PyErr_Occurred())
104 {
105 return NULL;
106 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000107 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
108 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000109 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000110 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
111 PyErr_SetObject(ErrorObject, errval);
112 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000113 return NULL;
114 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000115
116 list=PyList_New(self->num_groups+1);
117 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000118 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000119 {
120 PyObject *v;
121 int start=offsets[i*2], end=offsets[i*2+1];
122 /* If the group wasn't affected by the match, return -1, -1 */
123 if (start<0 || count<=i)
124 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000125 v=Py_BuildValue("ii", start, end);
126 if (v==NULL) {Py_DECREF(list); return NULL;}
127 PyList_SetItem(list, i, v);
128 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000129 return list;
130}
131
132static PyMethodDef Pcre_methods[] = {
133 {"match", (PyCFunction)PyPcre_exec, 1},
134 {NULL, NULL} /* sentinel */
135};
136
137static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000138PyPcre_getattr(PcreObject *self, char *name)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000139{
140 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
141}
142
143
144staticforward PyTypeObject Pcre_Type = {
Fred Drake0d40ba42000-02-04 20:33:49 +0000145 PyObject_HEAD_INIT(NULL)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000146 0, /*ob_size*/
147 "Pcre", /*tp_name*/
148 sizeof(PcreObject), /*tp_basicsize*/
149 0, /*tp_itemsize*/
150 /* methods */
151 (destructor)PyPcre_dealloc, /*tp_dealloc*/
152 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000153 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000154 0, /*tp_setattr*/
155 0, /*tp_compare*/
156 0, /*tp_repr*/
157 0, /*tp_as_number*/
158 0, /*tp_as_sequence*/
159 0, /*tp_as_mapping*/
160 0, /*tp_hash*/
161};
162/* --------------------------------------------------------------------- */
163
164static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000165PyPcre_compile(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000166{
167 PcreObject *rv;
168 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000169 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000170 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000171
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000172 int options, erroroffset;
Guido van Rossum43713e52000-02-29 13:59:29 +0000173 if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000174 &PyDict_Type, &dictionary))
175 return NULL;
176 rv = newPcreObject(args);
177 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000178 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000179
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000180 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000181 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000182 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000183 {
Guido van Rossumc4428c52000-04-25 15:59:32 +0000184 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000185 if (!PyErr_Occurred())
186 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000187 PyObject *errval = Py_BuildValue("si", error, erroroffset);
188 PyErr_SetObject(ErrorObject, errval);
189 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000190 }
191 return NULL;
192 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000193 rv->regex_extra=pcre_study(rv->regex, 0, &error);
194 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000195 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000196 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000197 Py_DECREF(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000198 PyErr_SetObject(ErrorObject, errval);
199 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000200 return NULL;
201 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000202 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
203 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000204 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000205 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
206 PyErr_SetObject(ErrorObject, errval);
207 Py_XDECREF(errval);
Guido van Rossumc4428c52000-04-25 15:59:32 +0000208 Py_DECREF(rv);
Guido van Rossum50700601997-12-08 17:15:20 +0000209 return NULL;
210 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000211 return (PyObject *)rv;
212}
213
214static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000215PyPcre_expand_escape(unsigned char *pattern, int pattern_len,
216 int *indexptr, int *typeptr)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000217{
Guido van Rossum50700601997-12-08 17:15:20 +0000218 unsigned char c;
219 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000220
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000221 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000222 {
223 PyErr_SetString(ErrorObject, "escape ends too soon");
224 return NULL;
225 }
226 c=pattern[index]; index++;
227 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000228
Guido van Rossum50700601997-12-08 17:15:20 +0000229 switch (c)
230 {
231 case('t'):
232 *indexptr=index;
233 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000234 case('n'):
235 *indexptr = index;
236 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000237 case('v'):
238 *indexptr = index;
239 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000240 case('r'):
241 *indexptr = index;
242 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000243 case('f'):
244 *indexptr = index;
245 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000246 case('a'):
247 *indexptr = index;
248 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000249 case('b'):
250 *indexptr=index;
251 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000252 case('\\'):
253 *indexptr=index;
254 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000255
Guido van Rossum50700601997-12-08 17:15:20 +0000256 case('x'):
257 {
258 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000259
Guido van Rossum50700601997-12-08 17:15:20 +0000260 x = 0; end = index;
261 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
262 {
263 ch = pattern[end];
264 x = x * 16 + pcre_lcc[ch] -
265 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
266 x &= 255;
267 end++;
268 }
269 if (end==index)
270 {
271 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
272 return NULL;
273 }
274 *indexptr = end;
275 return Py_BuildValue("c", (char)x);
276 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000277 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000278
279 case('E'): case('G'): case('L'): case('Q'):
280 case('U'): case('l'): case('u'):
281 {
282 char message[50];
283 sprintf(message, "\\%c is not allowed", c);
284 PyErr_SetString(ErrorObject, message);
285 return NULL;
286 }
287
288 case('g'):
289 {
290 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000291 int group_num = 0, is_number=0;
292
Guido van Rossum50700601997-12-08 17:15:20 +0000293 if (pattern_len<=index)
294 {
295 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
296 return NULL;
297 }
298 if (pattern[index]!='<')
299 {
300 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
301 return NULL;
302 }
303 index++;
304 end=index;
305 while (end<pattern_len && pattern[end]!='>')
306 end++;
307 if (end==pattern_len)
308 {
309 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
310 return NULL;
311 }
312
313 if (index==end) /* Zero-length name */
314 {
315 /* XXX should include the text of the reference */
316 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
317 return NULL;
318 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000319 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000320 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000321 is_number = 1;
322 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000323 }
324
325 for(i=index+1; i<end; i++)
326 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000327 if (is_number &&
328 !(pcre_ctypes[pattern[i]] & ctype_digit) )
329 {
330 /* XXX should include the text of the reference */
331 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
332 return NULL;
333 }
334 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000335 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
336 {
337 /* XXX should include the text of the reference */
338 PyErr_SetString(ErrorObject, "illegal symbolic reference");
339 return NULL;
340 }
341 }
342
343 *typeptr = MEMORY_REFERENCE;
344 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000345 /* If it's a number, return the integer value of the group */
346 if (is_number) return Py_BuildValue("i", group_num);
347 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000348 return Py_BuildValue("s#", pattern+index, end-index);
349 }
350 break;
351
352 case('0'):
353 {
354 /* \0 always indicates an octal escape, so we consume up to 3
355 characters, as long as they're all octal digits */
356 int octval=0, i;
357 index--;
358 for(i=index;
359 i<=index+2 && i<pattern_len
360 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
361 i++)
362 {
363 octval = octval * 8 + pattern[i] - '0';
364 }
365 if (octval>255)
366 {
367 PyErr_SetString(ErrorObject, "octal value out of range");
368 return NULL;
369 }
370 *indexptr = i;
371 return Py_BuildValue("c", (unsigned char)octval);
372 }
373 break;
374 case('1'): case('2'): case('3'): case('4'):
375 case('5'): case('6'): case('7'): case('8'):
376 case('9'):
377 {
378 /* Handle \?, where ? is from 1 through 9 */
379 int value=0;
380 index--;
381 /* If it's at least a two-digit reference, like \34, it might
382 either be a 3-digit octal escape (\123) or a 2-digit
383 decimal memory reference (\34) */
384
385 if ( (index+1) <pattern_len &&
386 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
387 {
388 if ( (index+2) <pattern_len &&
389 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
390 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
391 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
392 )
393 {
394 /* 3 octal digits */
395 value= 8*8*(pattern[index ]-'0') +
396 8*(pattern[index+1]-'0') +
397 (pattern[index+2]-'0');
398 if (value>255)
399 {
400 PyErr_SetString(ErrorObject, "octal value out of range");
401 return NULL;
402 }
403 *indexptr = index+3;
404 return Py_BuildValue("c", (unsigned char)value);
405 }
406 else
407 {
408 /* 2-digit form, so it's a memory reference */
409 value= 10*(pattern[index ]-'0') +
410 (pattern[index+1]-'0');
411 if (value<1 || EXTRACT_MAX<=value)
412 {
413 PyErr_SetString(ErrorObject, "memory reference out of range");
414 return NULL;
415 }
416 *typeptr = MEMORY_REFERENCE;
417 *indexptr = index+2;
418 return Py_BuildValue("i", value);
419 }
420 }
421 else
422 {
423 /* Single-digit form, like \2, so it's a memory reference */
424 *typeptr = MEMORY_REFERENCE;
425 *indexptr = index+1;
426 return Py_BuildValue("i", pattern[index]-'0');
427 }
428 }
429 break;
430
431 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000432 /* It's some unknown escape like \s, so return a string containing
433 \s */
434 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000435 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000436 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000437 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000438}
439
440static PyObject *
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000441PyPcre_expand(PyObject *self, PyObject *args)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000442{
Guido van Rossum50700601997-12-08 17:15:20 +0000443 PyObject *results, *match_obj;
444 PyObject *repl_obj, *newstring;
445 unsigned char *repl;
446 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000447
Guido van Rossum43713e52000-02-29 13:59:29 +0000448 if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj))
Guido van Rossum50700601997-12-08 17:15:20 +0000449 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000450
Guido van Rossum50700601997-12-08 17:15:20 +0000451 repl=(unsigned char *)PyString_AsString(repl_obj);
452 size=PyString_Size(repl_obj);
453 results=PyList_New(0);
454 if (results==NULL) return NULL;
455 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000456 {
Guido van Rossum50700601997-12-08 17:15:20 +0000457 if (repl[i]=='\\')
458 {
459 PyObject *value;
460 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000461
Guido van Rossum50700601997-12-08 17:15:20 +0000462 if (start!=i)
463 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000464 int status;
465 PyObject *s = PyString_FromStringAndSize(
466 (char *)repl+start, i-start);
467 if (s == NULL) {
468 Py_DECREF(results);
469 return NULL;
470 }
471 status = PyList_Append(results, s);
472 Py_DECREF(s);
473 if (status < 0) {
474 Py_DECREF(results);
475 return NULL;
476 }
Guido van Rossum50700601997-12-08 17:15:20 +0000477 total_len += i-start;
478 }
479 i++;
480 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
481 if (value==NULL)
482 {
483 /* PyPcre_expand_escape triggered an exception of some sort,
484 so just return */
485 Py_DECREF(results);
486 return NULL;
487 }
488 switch (escape_type)
489 {
490 case (CHAR):
491 PyList_Append(results, value);
492 total_len += PyString_Size(value);
493 break;
494 case(MEMORY_REFERENCE):
495 {
496 PyObject *r, *tuple, *result;
497 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000498 if (r == NULL) {
499 Py_DECREF(results);
500 return NULL;
501 }
Guido van Rossum50700601997-12-08 17:15:20 +0000502 tuple=PyTuple_New(1);
503 Py_INCREF(value);
504 PyTuple_SetItem(tuple, 0, value);
505 result=PyEval_CallObject(r, tuple);
506 Py_DECREF(r); Py_DECREF(tuple);
507 if (result==NULL)
508 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000509 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000510 Py_DECREF(results);
511 Py_DECREF(value);
512 return NULL;
513 }
514 if (result==Py_None)
515 {
516 char message[50];
517 sprintf(message,
518 "group did not contribute to the match");
519 PyErr_SetString(ErrorObject,
520 message);
521 Py_DECREF(result);
522 Py_DECREF(value);
523 Py_DECREF(results);
524 return NULL;
525 }
526 /* typecheck that it's a string! */
527 if (!PyString_Check(result))
528 {
529 Py_DECREF(results);
530 Py_DECREF(result);
531 PyErr_SetString(ErrorObject,
532 "group() must return a string value for replacement");
533 return NULL;
534 }
535 PyList_Append(results, result);
536 total_len += PyString_Size(result);
537 Py_DECREF(result);
538 }
539 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000540 case(STRING):
541 {
542 PyList_Append(results, value);
543 total_len += PyString_Size(value);
544 break;
545 }
Guido van Rossum50700601997-12-08 17:15:20 +0000546 default:
547 Py_DECREF(results);
548 PyErr_SetString(ErrorObject,
549 "bad escape in replacement");
550 return NULL;
551 }
552 Py_DECREF(value);
553 start=i;
554 i--; /* Decrement now, because the 'for' loop will increment it */
555 }
556 } /* endif repl[i]!='\\' */
557
558 if (start!=i)
559 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000560 int status;
561 PyObject *s = PyString_FromStringAndSize((char *)repl+start,
562 i-start);
563 if (s == NULL) {
564 Py_DECREF(results);
565 return NULL;
566 }
567 status = PyList_Append(results, s);
568 Py_DECREF(s);
569 if (status < 0) {
570 Py_DECREF(results);
571 return NULL;
572 }
Guido van Rossum50700601997-12-08 17:15:20 +0000573 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000574 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000575
Guido van Rossum50700601997-12-08 17:15:20 +0000576 /* Whew! Now we've constructed a list containing various pieces of
577 strings that will make up our final result. So, iterate over
578 the list concatenating them. A new string measuring total_len
579 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000580
Guido van Rossum50700601997-12-08 17:15:20 +0000581 newstring=PyString_FromStringAndSize(NULL, total_len);
582 if (newstring==NULL)
583 {
584 Py_DECREF(results);
585 return NULL;
586 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000587
Guido van Rossum50700601997-12-08 17:15:20 +0000588 repl=(unsigned char *)PyString_AsString(newstring);
589 for (pos=i=0; i<PyList_Size(results); i++)
590 {
591 PyObject *item=PyList_GetItem(results, i);
592 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
593 pos += PyString_Size(item);
594 }
595 Py_DECREF(results);
596 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000597}
598
599
600/* List of functions defined in the module */
601
602static PyMethodDef pcre_methods[] = {
603 {"pcre_compile", PyPcre_compile, 1},
604 {"pcre_expand", PyPcre_expand, 1},
605 {NULL, NULL} /* sentinel */
606};
607
608
609/*
610 * Convenience routine to export an integer value.
611 * For simplicity, errors (which are unlikely anyway) are ignored.
612 */
613
614static void
Peter Schneider-Kamp39e0e5a2000-07-10 13:12:27 +0000615insint(PyObject *d, char *name, int value)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000616{
617 PyObject *v = PyInt_FromLong((long) value);
618 if (v == NULL) {
619 /* Don't bother reporting this error */
620 PyErr_Clear();
621 }
622 else {
623 PyDict_SetItemString(d, name, v);
624 Py_DECREF(v);
625 }
626}
627
628
629/* Initialization function for the module (*must* be called initpcre) */
630
Guido van Rossum3886bb61998-12-04 18:50:17 +0000631DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000632initpcre(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000633{
634 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000635
Fred Drake0d40ba42000-02-04 20:33:49 +0000636 Pcre_Type.ob_type = &PyType_Type;
637
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000638 /* Create the module and add the functions */
639 m = Py_InitModule("pcre", pcre_methods);
640
641 /* Add some symbolic constants to the module */
642 d = PyModule_GetDict(m);
Fred Drake589c35b2000-07-06 19:38:49 +0000643 ErrorObject = PyErr_NewException("pcre.error", NULL, NULL);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000644 PyDict_SetItemString(d, "error", ErrorObject);
645
646 /* Insert the flags */
647 insint(d, "IGNORECASE", PCRE_CASELESS);
648 insint(d, "ANCHORED", PCRE_ANCHORED);
649 insint(d, "MULTILINE", PCRE_MULTILINE);
650 insint(d, "DOTALL", PCRE_DOTALL);
651 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000652 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000653
654 /* Check for errors */
655 if (PyErr_Occurred())
656 Py_FatalError("can't initialize module pcre");
657}
658