blob: 40efd03cf3128cd5edbe5a5ebe1c4e3f2d0cddf3 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* Pcre objects */
33
34#include "Python.h"
35
Guido van Rossum58132c61997-12-17 00:24:13 +000036#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000037#ifndef Py_eval_input
38/* For Python 1.4, graminit.h has to be explicitly included */
39#include "graminit.h"
40#define Py_eval_input eval_input
41#endif
42
43#ifndef FOR_PYTHON
44#define FOR_PYTHON
45#endif
46
47#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000048#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000049
50static PyObject *ErrorObject;
51
52typedef struct {
53 PyObject_HEAD
54 pcre *regex;
55 pcre_extra *regex_extra;
56 int num_groups;
57} PcreObject;
58
59staticforward PyTypeObject Pcre_Type;
60
61#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
62#define NORMAL 0
63#define CHARCLASS 1
64#define REPLACEMENT 2
65
66#define CHAR 0
67#define MEMORY_REFERENCE 1
68#define SYNTAX 2
69#define NOT_SYNTAX 3
70#define SET 4
71#define WORD_BOUNDARY 5
72#define NOT_WORD_BOUNDARY 6
73#define BEGINNING_OF_BUFFER 7
74#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000075#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000076
77static PcreObject *
78newPcreObject(arg)
79 PyObject *arg;
80{
81 PcreObject *self;
82 self = PyObject_NEW(PcreObject, &Pcre_Type);
83 if (self == NULL)
84 return NULL;
85 self->regex = NULL;
86 self->regex_extra = NULL;
87 return self;
88}
89
90/* Pcre methods */
91
92static void
93PyPcre_dealloc(self)
94 PcreObject *self;
95{
96 if (self->regex) free(self->regex);
97 if (self->regex_extra) free(self->regex_extra);
98 self->regex=NULL;
99 self->regex_extra=NULL;
100 PyMem_DEL(self);
101}
102
103
104static PyObject *
105PyPcre_exec(self, args)
106 PcreObject *self;
107 PyObject *args;
108{
Guido van Rossum50700601997-12-08 17:15:20 +0000109 char *string;
110 int stringlen, pos = 0, options=0, endpos = -1, i, count;
111 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000112 PyObject *list;
113
Guido van Rossum50700601997-12-08 17:15:20 +0000114 if (!PyArg_ParseTuple(args, "s#|iiii", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000115 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +0000116 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000117 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum816671c1998-03-10 04:55:29 +0000118 (char *)string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000119 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +0000120 /* If an error occurred during the match, and an exception was raised,
121 just return NULL and leave the exception alone. The most likely
122 problem to cause this would be running out of memory for
123 the failure stack. */
124 if (PyErr_Occurred())
125 {
126 return NULL;
127 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000128 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
129 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000130 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000131 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
132 PyErr_SetObject(ErrorObject, errval);
133 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000134 return NULL;
135 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000136
137 list=PyList_New(self->num_groups+1);
138 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000139 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000140 {
141 PyObject *v;
142 int start=offsets[i*2], end=offsets[i*2+1];
143 /* If the group wasn't affected by the match, return -1, -1 */
144 if (start<0 || count<=i)
145 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000146 v=Py_BuildValue("ii", start, end);
147 if (v==NULL) {Py_DECREF(list); return NULL;}
148 PyList_SetItem(list, i, v);
149 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000150 return list;
151}
152
153static PyMethodDef Pcre_methods[] = {
154 {"match", (PyCFunction)PyPcre_exec, 1},
155 {NULL, NULL} /* sentinel */
156};
157
158static PyObject *
159PyPcre_getattr(self, name)
160 PcreObject *self;
161 char *name;
162{
163 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
164}
165
166
167staticforward PyTypeObject Pcre_Type = {
168 PyObject_HEAD_INIT(&PyType_Type)
169 0, /*ob_size*/
170 "Pcre", /*tp_name*/
171 sizeof(PcreObject), /*tp_basicsize*/
172 0, /*tp_itemsize*/
173 /* methods */
174 (destructor)PyPcre_dealloc, /*tp_dealloc*/
175 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000176 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000177 0, /*tp_setattr*/
178 0, /*tp_compare*/
179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash*/
184};
185/* --------------------------------------------------------------------- */
186
187static PyObject *
188PyPcre_compile(self, args)
189 PyObject *self; /* Not used */
190 PyObject *args;
191{
192 PcreObject *rv;
193 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000194 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000195 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000196
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000197 int options, erroroffset;
198 if (!PyArg_ParseTuple(args, "siO!", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000199 &PyDict_Type, &dictionary))
200 return NULL;
201 rv = newPcreObject(args);
202 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000203 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000204
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000205 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000206 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000207 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000208 {
209 PyMem_DEL(rv);
210 if (!PyErr_Occurred())
211 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000212 PyObject *errval = Py_BuildValue("si", error, erroroffset);
213 PyErr_SetObject(ErrorObject, errval);
214 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000215 }
216 return NULL;
217 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000218 rv->regex_extra=pcre_study(rv->regex, 0, &error);
219 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000220 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000221 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossum50700601997-12-08 17:15:20 +0000222 PyMem_DEL(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000223 PyErr_SetObject(ErrorObject, errval);
224 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000225 return NULL;
226 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000227 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
228 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000229 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000230 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
231 PyErr_SetObject(ErrorObject, errval);
232 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000233 PyMem_DEL(rv);
234 return NULL;
235 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000236 return (PyObject *)rv;
237}
238
239static PyObject *
Guido van Rossumc3861071997-10-08 02:07:40 +0000240PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
Guido van Rossum50700601997-12-08 17:15:20 +0000241 unsigned char *pattern;
242 int pattern_len, *indexptr, *typeptr;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000243{
Guido van Rossum50700601997-12-08 17:15:20 +0000244 unsigned char c;
245 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000246
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000247 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000248 {
249 PyErr_SetString(ErrorObject, "escape ends too soon");
250 return NULL;
251 }
252 c=pattern[index]; index++;
253 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000254
Guido van Rossum50700601997-12-08 17:15:20 +0000255 switch (c)
256 {
257 case('t'):
258 *indexptr=index;
259 return Py_BuildValue("c", (char)9);
260 break;
261 case('n'):
262 *indexptr = index;
263 return Py_BuildValue("c", (char)10);
264 break;
265 case('v'):
266 *indexptr = index;
267 return Py_BuildValue("c", (char)11);
268 break;
269 case('r'):
270 *indexptr = index;
271 return Py_BuildValue("c", (char)13);
272 break;
273 case('f'):
274 *indexptr = index;
275 return Py_BuildValue("c", (char)12);
276 break;
277 case('a'):
278 *indexptr = index;
279 return Py_BuildValue("c", (char)7);
280 break;
281 case('b'):
282 *indexptr=index;
283 return Py_BuildValue("c", (char)8);
284 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000285 case('\\'):
286 *indexptr=index;
287 return Py_BuildValue("c", '\\');
288 break;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000289
Guido van Rossum50700601997-12-08 17:15:20 +0000290 case('x'):
291 {
292 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000293
Guido van Rossum50700601997-12-08 17:15:20 +0000294 x = 0; end = index;
295 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
296 {
297 ch = pattern[end];
298 x = x * 16 + pcre_lcc[ch] -
299 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
300 x &= 255;
301 end++;
302 }
303 if (end==index)
304 {
305 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
306 return NULL;
307 }
308 *indexptr = end;
309 return Py_BuildValue("c", (char)x);
310 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000311 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000312
313 case('E'): case('G'): case('L'): case('Q'):
314 case('U'): case('l'): case('u'):
315 {
316 char message[50];
317 sprintf(message, "\\%c is not allowed", c);
318 PyErr_SetString(ErrorObject, message);
319 return NULL;
320 }
321
322 case('g'):
323 {
324 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000325 int group_num = 0, is_number=0;
326
Guido van Rossum50700601997-12-08 17:15:20 +0000327 if (pattern_len<=index)
328 {
329 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
330 return NULL;
331 }
332 if (pattern[index]!='<')
333 {
334 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
335 return NULL;
336 }
337 index++;
338 end=index;
339 while (end<pattern_len && pattern[end]!='>')
340 end++;
341 if (end==pattern_len)
342 {
343 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
344 return NULL;
345 }
346
347 if (index==end) /* Zero-length name */
348 {
349 /* XXX should include the text of the reference */
350 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
351 return NULL;
352 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000353 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000354 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000355 is_number = 1;
356 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000357 }
358
359 for(i=index+1; i<end; i++)
360 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000361 if (is_number &&
362 !(pcre_ctypes[pattern[i]] & ctype_digit) )
363 {
364 /* XXX should include the text of the reference */
365 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
366 return NULL;
367 }
368 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000369 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
370 {
371 /* XXX should include the text of the reference */
372 PyErr_SetString(ErrorObject, "illegal symbolic reference");
373 return NULL;
374 }
375 }
376
377 *typeptr = MEMORY_REFERENCE;
378 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000379 /* If it's a number, return the integer value of the group */
380 if (is_number) return Py_BuildValue("i", group_num);
381 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000382 return Py_BuildValue("s#", pattern+index, end-index);
383 }
384 break;
385
386 case('0'):
387 {
388 /* \0 always indicates an octal escape, so we consume up to 3
389 characters, as long as they're all octal digits */
390 int octval=0, i;
391 index--;
392 for(i=index;
393 i<=index+2 && i<pattern_len
394 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
395 i++)
396 {
397 octval = octval * 8 + pattern[i] - '0';
398 }
399 if (octval>255)
400 {
401 PyErr_SetString(ErrorObject, "octal value out of range");
402 return NULL;
403 }
404 *indexptr = i;
405 return Py_BuildValue("c", (unsigned char)octval);
406 }
407 break;
408 case('1'): case('2'): case('3'): case('4'):
409 case('5'): case('6'): case('7'): case('8'):
410 case('9'):
411 {
412 /* Handle \?, where ? is from 1 through 9 */
413 int value=0;
414 index--;
415 /* If it's at least a two-digit reference, like \34, it might
416 either be a 3-digit octal escape (\123) or a 2-digit
417 decimal memory reference (\34) */
418
419 if ( (index+1) <pattern_len &&
420 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
421 {
422 if ( (index+2) <pattern_len &&
423 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
424 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
425 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
426 )
427 {
428 /* 3 octal digits */
429 value= 8*8*(pattern[index ]-'0') +
430 8*(pattern[index+1]-'0') +
431 (pattern[index+2]-'0');
432 if (value>255)
433 {
434 PyErr_SetString(ErrorObject, "octal value out of range");
435 return NULL;
436 }
437 *indexptr = index+3;
438 return Py_BuildValue("c", (unsigned char)value);
439 }
440 else
441 {
442 /* 2-digit form, so it's a memory reference */
443 value= 10*(pattern[index ]-'0') +
444 (pattern[index+1]-'0');
445 if (value<1 || EXTRACT_MAX<=value)
446 {
447 PyErr_SetString(ErrorObject, "memory reference out of range");
448 return NULL;
449 }
450 *typeptr = MEMORY_REFERENCE;
451 *indexptr = index+2;
452 return Py_BuildValue("i", value);
453 }
454 }
455 else
456 {
457 /* Single-digit form, like \2, so it's a memory reference */
458 *typeptr = MEMORY_REFERENCE;
459 *indexptr = index+1;
460 return Py_BuildValue("i", pattern[index]-'0');
461 }
462 }
463 break;
464
465 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000466 /* It's some unknown escape like \s, so return a string containing
467 \s */
468 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000469 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000470 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000471 break;
472 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000473}
474
475static PyObject *
476PyPcre_expand(self, args)
477 PyObject *self;
478 PyObject *args;
479{
Guido van Rossum50700601997-12-08 17:15:20 +0000480 PyObject *results, *match_obj;
481 PyObject *repl_obj, *newstring;
482 unsigned char *repl;
483 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000484
Guido van Rossum50700601997-12-08 17:15:20 +0000485 if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj))
486 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000487
Guido van Rossum50700601997-12-08 17:15:20 +0000488 repl=(unsigned char *)PyString_AsString(repl_obj);
489 size=PyString_Size(repl_obj);
490 results=PyList_New(0);
491 if (results==NULL) return NULL;
492 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000493 {
Guido van Rossum50700601997-12-08 17:15:20 +0000494 if (repl[i]=='\\')
495 {
496 PyObject *value;
497 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000498
Guido van Rossum50700601997-12-08 17:15:20 +0000499 if (start!=i)
500 {
501 PyList_Append(results,
502 PyString_FromStringAndSize((char *)repl+start, i-start));
503 total_len += i-start;
504 }
505 i++;
506 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
507 if (value==NULL)
508 {
509 /* PyPcre_expand_escape triggered an exception of some sort,
510 so just return */
511 Py_DECREF(results);
512 return NULL;
513 }
514 switch (escape_type)
515 {
516 case (CHAR):
517 PyList_Append(results, value);
518 total_len += PyString_Size(value);
519 break;
520 case(MEMORY_REFERENCE):
521 {
522 PyObject *r, *tuple, *result;
523 r=PyObject_GetAttrString(match_obj, "group");
524 tuple=PyTuple_New(1);
525 Py_INCREF(value);
526 PyTuple_SetItem(tuple, 0, value);
527 result=PyEval_CallObject(r, tuple);
528 Py_DECREF(r); Py_DECREF(tuple);
529 if (result==NULL)
530 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000531 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000532 Py_DECREF(results);
533 Py_DECREF(value);
534 return NULL;
535 }
536 if (result==Py_None)
537 {
538 char message[50];
539 sprintf(message,
540 "group did not contribute to the match");
541 PyErr_SetString(ErrorObject,
542 message);
543 Py_DECREF(result);
544 Py_DECREF(value);
545 Py_DECREF(results);
546 return NULL;
547 }
548 /* typecheck that it's a string! */
549 if (!PyString_Check(result))
550 {
551 Py_DECREF(results);
552 Py_DECREF(result);
553 PyErr_SetString(ErrorObject,
554 "group() must return a string value for replacement");
555 return NULL;
556 }
557 PyList_Append(results, result);
558 total_len += PyString_Size(result);
559 Py_DECREF(result);
560 }
561 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000562 case(STRING):
563 {
564 PyList_Append(results, value);
565 total_len += PyString_Size(value);
566 break;
567 }
Guido van Rossum50700601997-12-08 17:15:20 +0000568 default:
569 Py_DECREF(results);
570 PyErr_SetString(ErrorObject,
571 "bad escape in replacement");
572 return NULL;
573 }
574 Py_DECREF(value);
575 start=i;
576 i--; /* Decrement now, because the 'for' loop will increment it */
577 }
578 } /* endif repl[i]!='\\' */
579
580 if (start!=i)
581 {
582 PyList_Append(results, PyString_FromStringAndSize((char *)repl+start, i-start));
583 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000584 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000585
Guido van Rossum50700601997-12-08 17:15:20 +0000586 /* Whew! Now we've constructed a list containing various pieces of
587 strings that will make up our final result. So, iterate over
588 the list concatenating them. A new string measuring total_len
589 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000590
Guido van Rossum50700601997-12-08 17:15:20 +0000591 newstring=PyString_FromStringAndSize(NULL, total_len);
592 if (newstring==NULL)
593 {
594 Py_DECREF(results);
595 return NULL;
596 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000597
Guido van Rossum50700601997-12-08 17:15:20 +0000598 repl=(unsigned char *)PyString_AsString(newstring);
599 for (pos=i=0; i<PyList_Size(results); i++)
600 {
601 PyObject *item=PyList_GetItem(results, i);
602 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
603 pos += PyString_Size(item);
604 }
605 Py_DECREF(results);
606 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000607}
608
609
610/* List of functions defined in the module */
611
612static PyMethodDef pcre_methods[] = {
613 {"pcre_compile", PyPcre_compile, 1},
614 {"pcre_expand", PyPcre_expand, 1},
615 {NULL, NULL} /* sentinel */
616};
617
618
619/*
620 * Convenience routine to export an integer value.
621 * For simplicity, errors (which are unlikely anyway) are ignored.
622 */
623
624static void
625insint(d, name, value)
626 PyObject * d;
627 char * name;
628 int value;
629{
630 PyObject *v = PyInt_FromLong((long) value);
631 if (v == NULL) {
632 /* Don't bother reporting this error */
633 PyErr_Clear();
634 }
635 else {
636 PyDict_SetItemString(d, name, v);
637 Py_DECREF(v);
638 }
639}
640
641
642/* Initialization function for the module (*must* be called initpcre) */
643
644void
645initpcre()
646{
647 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000648
649 /* Create the module and add the functions */
650 m = Py_InitModule("pcre", pcre_methods);
651
652 /* Add some symbolic constants to the module */
653 d = PyModule_GetDict(m);
654 ErrorObject = PyString_FromString("pcre.error");
655 PyDict_SetItemString(d, "error", ErrorObject);
656
657 /* Insert the flags */
658 insint(d, "IGNORECASE", PCRE_CASELESS);
659 insint(d, "ANCHORED", PCRE_ANCHORED);
660 insint(d, "MULTILINE", PCRE_MULTILINE);
661 insint(d, "DOTALL", PCRE_DOTALL);
662 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000663 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000664
665 /* Check for errors */
666 if (PyErr_Occurred())
667 Py_FatalError("can't initialize module pcre");
668}
669