blob: 3b61697e00130e5322f9c028c274fbd238c48ab9 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* Pcre objects */
33
34#include "Python.h"
35
Guido van Rossum58132c61997-12-17 00:24:13 +000036#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000037#ifndef Py_eval_input
38/* For Python 1.4, graminit.h has to be explicitly included */
39#include "graminit.h"
40#define Py_eval_input eval_input
41#endif
42
43#ifndef FOR_PYTHON
44#define FOR_PYTHON
45#endif
46
47#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000048#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000049
50static PyObject *ErrorObject;
51
52typedef struct {
53 PyObject_HEAD
54 pcre *regex;
55 pcre_extra *regex_extra;
56 int num_groups;
57} PcreObject;
58
59staticforward PyTypeObject Pcre_Type;
60
61#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
62#define NORMAL 0
63#define CHARCLASS 1
64#define REPLACEMENT 2
65
66#define CHAR 0
67#define MEMORY_REFERENCE 1
68#define SYNTAX 2
69#define NOT_SYNTAX 3
70#define SET 4
71#define WORD_BOUNDARY 5
72#define NOT_WORD_BOUNDARY 6
73#define BEGINNING_OF_BUFFER 7
74#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000075#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000076
77static PcreObject *
78newPcreObject(arg)
79 PyObject *arg;
80{
81 PcreObject *self;
82 self = PyObject_NEW(PcreObject, &Pcre_Type);
83 if (self == NULL)
84 return NULL;
85 self->regex = NULL;
86 self->regex_extra = NULL;
87 return self;
88}
89
90/* Pcre methods */
91
92static void
93PyPcre_dealloc(self)
94 PcreObject *self;
95{
96 if (self->regex) free(self->regex);
97 if (self->regex_extra) free(self->regex_extra);
98 self->regex=NULL;
99 self->regex_extra=NULL;
100 PyMem_DEL(self);
101}
102
103
104static PyObject *
105PyPcre_exec(self, args)
106 PcreObject *self;
107 PyObject *args;
108{
Guido van Rossum50700601997-12-08 17:15:20 +0000109 char *string;
110 int stringlen, pos = 0, options=0, endpos = -1, i, count;
111 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000112 PyObject *list;
113
Guido van Rossum7e488981998-10-08 02:25:24 +0000114 if (!PyArg_ParseTuple(args, "t#|iiii", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000115 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +0000116 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000117 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +0000118 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000119 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +0000120 /* If an error occurred during the match, and an exception was raised,
121 just return NULL and leave the exception alone. The most likely
122 problem to cause this would be running out of memory for
123 the failure stack. */
124 if (PyErr_Occurred())
125 {
126 return NULL;
127 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000128 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
129 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000130 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000131 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
132 PyErr_SetObject(ErrorObject, errval);
133 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000134 return NULL;
135 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000136
137 list=PyList_New(self->num_groups+1);
138 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000139 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000140 {
141 PyObject *v;
142 int start=offsets[i*2], end=offsets[i*2+1];
143 /* If the group wasn't affected by the match, return -1, -1 */
144 if (start<0 || count<=i)
145 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000146 v=Py_BuildValue("ii", start, end);
147 if (v==NULL) {Py_DECREF(list); return NULL;}
148 PyList_SetItem(list, i, v);
149 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000150 return list;
151}
152
153static PyMethodDef Pcre_methods[] = {
154 {"match", (PyCFunction)PyPcre_exec, 1},
155 {NULL, NULL} /* sentinel */
156};
157
158static PyObject *
159PyPcre_getattr(self, name)
160 PcreObject *self;
161 char *name;
162{
163 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
164}
165
166
167staticforward PyTypeObject Pcre_Type = {
168 PyObject_HEAD_INIT(&PyType_Type)
169 0, /*ob_size*/
170 "Pcre", /*tp_name*/
171 sizeof(PcreObject), /*tp_basicsize*/
172 0, /*tp_itemsize*/
173 /* methods */
174 (destructor)PyPcre_dealloc, /*tp_dealloc*/
175 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000176 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000177 0, /*tp_setattr*/
178 0, /*tp_compare*/
179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash*/
184};
185/* --------------------------------------------------------------------- */
186
187static PyObject *
188PyPcre_compile(self, args)
189 PyObject *self; /* Not used */
190 PyObject *args;
191{
192 PcreObject *rv;
193 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000194 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000195 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000196
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000197 int options, erroroffset;
198 if (!PyArg_ParseTuple(args, "siO!", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000199 &PyDict_Type, &dictionary))
200 return NULL;
201 rv = newPcreObject(args);
202 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000203 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000204
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000205 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000206 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000207 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000208 {
209 PyMem_DEL(rv);
210 if (!PyErr_Occurred())
211 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000212 PyObject *errval = Py_BuildValue("si", error, erroroffset);
213 PyErr_SetObject(ErrorObject, errval);
214 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000215 }
216 return NULL;
217 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000218 rv->regex_extra=pcre_study(rv->regex, 0, &error);
219 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000220 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000221 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossum50700601997-12-08 17:15:20 +0000222 PyMem_DEL(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000223 PyErr_SetObject(ErrorObject, errval);
224 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000225 return NULL;
226 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000227 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
228 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000229 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000230 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
231 PyErr_SetObject(ErrorObject, errval);
232 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000233 PyMem_DEL(rv);
234 return NULL;
235 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000236 return (PyObject *)rv;
237}
238
239static PyObject *
Guido van Rossumc3861071997-10-08 02:07:40 +0000240PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
Guido van Rossum50700601997-12-08 17:15:20 +0000241 unsigned char *pattern;
242 int pattern_len, *indexptr, *typeptr;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000243{
Guido van Rossum50700601997-12-08 17:15:20 +0000244 unsigned char c;
245 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000246
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000247 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000248 {
249 PyErr_SetString(ErrorObject, "escape ends too soon");
250 return NULL;
251 }
252 c=pattern[index]; index++;
253 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000254
Guido van Rossum50700601997-12-08 17:15:20 +0000255 switch (c)
256 {
257 case('t'):
258 *indexptr=index;
259 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000260 case('n'):
261 *indexptr = index;
262 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000263 case('v'):
264 *indexptr = index;
265 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000266 case('r'):
267 *indexptr = index;
268 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000269 case('f'):
270 *indexptr = index;
271 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000272 case('a'):
273 *indexptr = index;
274 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000275 case('b'):
276 *indexptr=index;
277 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000278 case('\\'):
279 *indexptr=index;
280 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000281
Guido van Rossum50700601997-12-08 17:15:20 +0000282 case('x'):
283 {
284 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000285
Guido van Rossum50700601997-12-08 17:15:20 +0000286 x = 0; end = index;
287 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
288 {
289 ch = pattern[end];
290 x = x * 16 + pcre_lcc[ch] -
291 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
292 x &= 255;
293 end++;
294 }
295 if (end==index)
296 {
297 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
298 return NULL;
299 }
300 *indexptr = end;
301 return Py_BuildValue("c", (char)x);
302 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000303 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000304
305 case('E'): case('G'): case('L'): case('Q'):
306 case('U'): case('l'): case('u'):
307 {
308 char message[50];
309 sprintf(message, "\\%c is not allowed", c);
310 PyErr_SetString(ErrorObject, message);
311 return NULL;
312 }
313
314 case('g'):
315 {
316 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000317 int group_num = 0, is_number=0;
318
Guido van Rossum50700601997-12-08 17:15:20 +0000319 if (pattern_len<=index)
320 {
321 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
322 return NULL;
323 }
324 if (pattern[index]!='<')
325 {
326 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
327 return NULL;
328 }
329 index++;
330 end=index;
331 while (end<pattern_len && pattern[end]!='>')
332 end++;
333 if (end==pattern_len)
334 {
335 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
336 return NULL;
337 }
338
339 if (index==end) /* Zero-length name */
340 {
341 /* XXX should include the text of the reference */
342 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
343 return NULL;
344 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000345 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000346 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000347 is_number = 1;
348 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000349 }
350
351 for(i=index+1; i<end; i++)
352 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000353 if (is_number &&
354 !(pcre_ctypes[pattern[i]] & ctype_digit) )
355 {
356 /* XXX should include the text of the reference */
357 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
358 return NULL;
359 }
360 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000361 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
362 {
363 /* XXX should include the text of the reference */
364 PyErr_SetString(ErrorObject, "illegal symbolic reference");
365 return NULL;
366 }
367 }
368
369 *typeptr = MEMORY_REFERENCE;
370 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000371 /* If it's a number, return the integer value of the group */
372 if (is_number) return Py_BuildValue("i", group_num);
373 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000374 return Py_BuildValue("s#", pattern+index, end-index);
375 }
376 break;
377
378 case('0'):
379 {
380 /* \0 always indicates an octal escape, so we consume up to 3
381 characters, as long as they're all octal digits */
382 int octval=0, i;
383 index--;
384 for(i=index;
385 i<=index+2 && i<pattern_len
386 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
387 i++)
388 {
389 octval = octval * 8 + pattern[i] - '0';
390 }
391 if (octval>255)
392 {
393 PyErr_SetString(ErrorObject, "octal value out of range");
394 return NULL;
395 }
396 *indexptr = i;
397 return Py_BuildValue("c", (unsigned char)octval);
398 }
399 break;
400 case('1'): case('2'): case('3'): case('4'):
401 case('5'): case('6'): case('7'): case('8'):
402 case('9'):
403 {
404 /* Handle \?, where ? is from 1 through 9 */
405 int value=0;
406 index--;
407 /* If it's at least a two-digit reference, like \34, it might
408 either be a 3-digit octal escape (\123) or a 2-digit
409 decimal memory reference (\34) */
410
411 if ( (index+1) <pattern_len &&
412 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
413 {
414 if ( (index+2) <pattern_len &&
415 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
416 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
417 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
418 )
419 {
420 /* 3 octal digits */
421 value= 8*8*(pattern[index ]-'0') +
422 8*(pattern[index+1]-'0') +
423 (pattern[index+2]-'0');
424 if (value>255)
425 {
426 PyErr_SetString(ErrorObject, "octal value out of range");
427 return NULL;
428 }
429 *indexptr = index+3;
430 return Py_BuildValue("c", (unsigned char)value);
431 }
432 else
433 {
434 /* 2-digit form, so it's a memory reference */
435 value= 10*(pattern[index ]-'0') +
436 (pattern[index+1]-'0');
437 if (value<1 || EXTRACT_MAX<=value)
438 {
439 PyErr_SetString(ErrorObject, "memory reference out of range");
440 return NULL;
441 }
442 *typeptr = MEMORY_REFERENCE;
443 *indexptr = index+2;
444 return Py_BuildValue("i", value);
445 }
446 }
447 else
448 {
449 /* Single-digit form, like \2, so it's a memory reference */
450 *typeptr = MEMORY_REFERENCE;
451 *indexptr = index+1;
452 return Py_BuildValue("i", pattern[index]-'0');
453 }
454 }
455 break;
456
457 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000458 /* It's some unknown escape like \s, so return a string containing
459 \s */
460 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000461 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000462 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000463 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000464}
465
466static PyObject *
467PyPcre_expand(self, args)
468 PyObject *self;
469 PyObject *args;
470{
Guido van Rossum50700601997-12-08 17:15:20 +0000471 PyObject *results, *match_obj;
472 PyObject *repl_obj, *newstring;
473 unsigned char *repl;
474 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000475
Guido van Rossum50700601997-12-08 17:15:20 +0000476 if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj))
477 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000478
Guido van Rossum50700601997-12-08 17:15:20 +0000479 repl=(unsigned char *)PyString_AsString(repl_obj);
480 size=PyString_Size(repl_obj);
481 results=PyList_New(0);
482 if (results==NULL) return NULL;
483 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000484 {
Guido van Rossum50700601997-12-08 17:15:20 +0000485 if (repl[i]=='\\')
486 {
487 PyObject *value;
488 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000489
Guido van Rossum50700601997-12-08 17:15:20 +0000490 if (start!=i)
491 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000492 int status;
493 PyObject *s = PyString_FromStringAndSize(
494 (char *)repl+start, i-start);
495 if (s == NULL) {
496 Py_DECREF(results);
497 return NULL;
498 }
499 status = PyList_Append(results, s);
500 Py_DECREF(s);
501 if (status < 0) {
502 Py_DECREF(results);
503 return NULL;
504 }
Guido van Rossum50700601997-12-08 17:15:20 +0000505 total_len += i-start;
506 }
507 i++;
508 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
509 if (value==NULL)
510 {
511 /* PyPcre_expand_escape triggered an exception of some sort,
512 so just return */
513 Py_DECREF(results);
514 return NULL;
515 }
516 switch (escape_type)
517 {
518 case (CHAR):
519 PyList_Append(results, value);
520 total_len += PyString_Size(value);
521 break;
522 case(MEMORY_REFERENCE):
523 {
524 PyObject *r, *tuple, *result;
525 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000526 if (r == NULL) {
527 Py_DECREF(results);
528 return NULL;
529 }
Guido van Rossum50700601997-12-08 17:15:20 +0000530 tuple=PyTuple_New(1);
531 Py_INCREF(value);
532 PyTuple_SetItem(tuple, 0, value);
533 result=PyEval_CallObject(r, tuple);
534 Py_DECREF(r); Py_DECREF(tuple);
535 if (result==NULL)
536 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000537 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000538 Py_DECREF(results);
539 Py_DECREF(value);
540 return NULL;
541 }
542 if (result==Py_None)
543 {
544 char message[50];
545 sprintf(message,
546 "group did not contribute to the match");
547 PyErr_SetString(ErrorObject,
548 message);
549 Py_DECREF(result);
550 Py_DECREF(value);
551 Py_DECREF(results);
552 return NULL;
553 }
554 /* typecheck that it's a string! */
555 if (!PyString_Check(result))
556 {
557 Py_DECREF(results);
558 Py_DECREF(result);
559 PyErr_SetString(ErrorObject,
560 "group() must return a string value for replacement");
561 return NULL;
562 }
563 PyList_Append(results, result);
564 total_len += PyString_Size(result);
565 Py_DECREF(result);
566 }
567 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000568 case(STRING):
569 {
570 PyList_Append(results, value);
571 total_len += PyString_Size(value);
572 break;
573 }
Guido van Rossum50700601997-12-08 17:15:20 +0000574 default:
575 Py_DECREF(results);
576 PyErr_SetString(ErrorObject,
577 "bad escape in replacement");
578 return NULL;
579 }
580 Py_DECREF(value);
581 start=i;
582 i--; /* Decrement now, because the 'for' loop will increment it */
583 }
584 } /* endif repl[i]!='\\' */
585
586 if (start!=i)
587 {
Barry Warsaw6aa42571999-02-01 17:09:00 +0000588 int status;
589 PyObject *s = PyString_FromStringAndSize((char *)repl+start,
590 i-start);
591 if (s == NULL) {
592 Py_DECREF(results);
593 return NULL;
594 }
595 status = PyList_Append(results, s);
596 Py_DECREF(s);
597 if (status < 0) {
598 Py_DECREF(results);
599 return NULL;
600 }
Guido van Rossum50700601997-12-08 17:15:20 +0000601 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000602 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000603
Guido van Rossum50700601997-12-08 17:15:20 +0000604 /* Whew! Now we've constructed a list containing various pieces of
605 strings that will make up our final result. So, iterate over
606 the list concatenating them. A new string measuring total_len
607 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000608
Guido van Rossum50700601997-12-08 17:15:20 +0000609 newstring=PyString_FromStringAndSize(NULL, total_len);
610 if (newstring==NULL)
611 {
612 Py_DECREF(results);
613 return NULL;
614 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000615
Guido van Rossum50700601997-12-08 17:15:20 +0000616 repl=(unsigned char *)PyString_AsString(newstring);
617 for (pos=i=0; i<PyList_Size(results); i++)
618 {
619 PyObject *item=PyList_GetItem(results, i);
620 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
621 pos += PyString_Size(item);
622 }
623 Py_DECREF(results);
624 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000625}
626
627
628/* List of functions defined in the module */
629
630static PyMethodDef pcre_methods[] = {
631 {"pcre_compile", PyPcre_compile, 1},
632 {"pcre_expand", PyPcre_expand, 1},
633 {NULL, NULL} /* sentinel */
634};
635
636
637/*
638 * Convenience routine to export an integer value.
639 * For simplicity, errors (which are unlikely anyway) are ignored.
640 */
641
642static void
643insint(d, name, value)
644 PyObject * d;
645 char * name;
646 int value;
647{
648 PyObject *v = PyInt_FromLong((long) value);
649 if (v == NULL) {
650 /* Don't bother reporting this error */
651 PyErr_Clear();
652 }
653 else {
654 PyDict_SetItemString(d, name, v);
655 Py_DECREF(v);
656 }
657}
658
659
660/* Initialization function for the module (*must* be called initpcre) */
661
Guido van Rossum3886bb61998-12-04 18:50:17 +0000662DL_EXPORT(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000663initpcre()
664{
665 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000666
667 /* Create the module and add the functions */
668 m = Py_InitModule("pcre", pcre_methods);
669
670 /* Add some symbolic constants to the module */
671 d = PyModule_GetDict(m);
672 ErrorObject = PyString_FromString("pcre.error");
673 PyDict_SetItemString(d, "error", ErrorObject);
674
675 /* Insert the flags */
676 insint(d, "IGNORECASE", PCRE_CASELESS);
677 insint(d, "ANCHORED", PCRE_ANCHORED);
678 insint(d, "MULTILINE", PCRE_MULTILINE);
679 insint(d, "DOTALL", PCRE_DOTALL);
680 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000681 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000682
683 /* Check for errors */
684 if (PyErr_Occurred())
685 Py_FatalError("can't initialize module pcre");
686}
687