blob: c4890dfe7c1f91ba3969348071eb5b436e7a8cd8 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* Pcre objects */
33
34#include "Python.h"
35
Guido van Rossum58132c61997-12-17 00:24:13 +000036#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000037#ifndef Py_eval_input
38/* For Python 1.4, graminit.h has to be explicitly included */
39#include "graminit.h"
40#define Py_eval_input eval_input
41#endif
42
43#ifndef FOR_PYTHON
44#define FOR_PYTHON
45#endif
46
47#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000048#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000049
50static PyObject *ErrorObject;
51
52typedef struct {
53 PyObject_HEAD
54 pcre *regex;
55 pcre_extra *regex_extra;
56 int num_groups;
57} PcreObject;
58
59staticforward PyTypeObject Pcre_Type;
60
61#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
62#define NORMAL 0
63#define CHARCLASS 1
64#define REPLACEMENT 2
65
66#define CHAR 0
67#define MEMORY_REFERENCE 1
68#define SYNTAX 2
69#define NOT_SYNTAX 3
70#define SET 4
71#define WORD_BOUNDARY 5
72#define NOT_WORD_BOUNDARY 6
73#define BEGINNING_OF_BUFFER 7
74#define END_OF_BUFFER 8
75
76
77static PcreObject *
78newPcreObject(arg)
79 PyObject *arg;
80{
81 PcreObject *self;
82 self = PyObject_NEW(PcreObject, &Pcre_Type);
83 if (self == NULL)
84 return NULL;
85 self->regex = NULL;
86 self->regex_extra = NULL;
87 return self;
88}
89
90/* Pcre methods */
91
92static void
93PyPcre_dealloc(self)
94 PcreObject *self;
95{
96 if (self->regex) free(self->regex);
97 if (self->regex_extra) free(self->regex_extra);
98 self->regex=NULL;
99 self->regex_extra=NULL;
100 PyMem_DEL(self);
101}
102
103
104static PyObject *
105PyPcre_exec(self, args)
106 PcreObject *self;
107 PyObject *args;
108{
Guido van Rossum50700601997-12-08 17:15:20 +0000109 char *string;
110 int stringlen, pos = 0, options=0, endpos = -1, i, count;
111 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000112 PyObject *list;
113
Guido van Rossum50700601997-12-08 17:15:20 +0000114 if (!PyArg_ParseTuple(args, "s#|iiii", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000115 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +0000116 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000117 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum816671c1998-03-10 04:55:29 +0000118 (char *)string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000119 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +0000120 /* If an error occurred during the match, and an exception was raised,
121 just return NULL and leave the exception alone. The most likely
122 problem to cause this would be running out of memory for
123 the failure stack. */
124 if (PyErr_Occurred())
125 {
126 return NULL;
127 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000128 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
129 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000130 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000131 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
132 PyErr_SetObject(ErrorObject, errval);
133 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000134 return NULL;
135 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000136
137 list=PyList_New(self->num_groups+1);
138 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000139 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000140 {
141 PyObject *v;
142 int start=offsets[i*2], end=offsets[i*2+1];
143 /* If the group wasn't affected by the match, return -1, -1 */
144 if (start<0 || count<=i)
145 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000146 v=Py_BuildValue("ii", start, end);
147 if (v==NULL) {Py_DECREF(list); return NULL;}
148 PyList_SetItem(list, i, v);
149 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000150 return list;
151}
152
153static PyMethodDef Pcre_methods[] = {
154 {"match", (PyCFunction)PyPcre_exec, 1},
155 {NULL, NULL} /* sentinel */
156};
157
158static PyObject *
159PyPcre_getattr(self, name)
160 PcreObject *self;
161 char *name;
162{
163 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
164}
165
166
167staticforward PyTypeObject Pcre_Type = {
168 PyObject_HEAD_INIT(&PyType_Type)
169 0, /*ob_size*/
170 "Pcre", /*tp_name*/
171 sizeof(PcreObject), /*tp_basicsize*/
172 0, /*tp_itemsize*/
173 /* methods */
174 (destructor)PyPcre_dealloc, /*tp_dealloc*/
175 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000176 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000177 0, /*tp_setattr*/
178 0, /*tp_compare*/
179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash*/
184};
185/* --------------------------------------------------------------------- */
186
187static PyObject *
188PyPcre_compile(self, args)
189 PyObject *self; /* Not used */
190 PyObject *args;
191{
192 PcreObject *rv;
193 PyObject *dictionary;
Guido van Rossum50700601997-12-08 17:15:20 +0000194 char *pattern, *newpattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000195 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000196 int num_zeros, i, j;
197
198 int patternlen, options, erroroffset;
199 if (!PyArg_ParseTuple(args, "s#iO!", &pattern, &patternlen, &options,
200 &PyDict_Type, &dictionary))
201 return NULL;
202 rv = newPcreObject(args);
203 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000204 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000205
206 /* PCRE doesn't like having null bytes in its pattern, so we have to replace
Guido van Rossum58132c61997-12-17 00:24:13 +0000207 any zeros in the string with the characters '\000'. This increases the size
208 of the string by 3*num_zeros, plus 1 byte for the terminating \0. */
209 num_zeros=1; /* Start at 1; this will give 3 extra bytes of leeway */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000210 for(i=0; i<patternlen; i++) {
Guido van Rossum50700601997-12-08 17:15:20 +0000211 if (pattern[i]==0) num_zeros++;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000212 }
Guido van Rossum58132c61997-12-17 00:24:13 +0000213 newpattern=malloc(patternlen + num_zeros*3 + 4);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000214 if (newpattern==NULL) {
Guido van Rossum50700601997-12-08 17:15:20 +0000215 PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern");
216 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000217 }
218 for (i=j=0; i<patternlen; i++, j++)
Guido van Rossum50700601997-12-08 17:15:20 +0000219 {
220 if (pattern[i]!=0) newpattern[j]=pattern[i];
221 else {
Guido van Rossum58132c61997-12-17 00:24:13 +0000222 newpattern[j++] ='\\';
223 newpattern[j++] = '0';
224 newpattern[j++] = '0';
225 newpattern[j ] = '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000226 }
227 }
Guido van Rossum58132c61997-12-17 00:24:13 +0000228 /* Keep purify happy; for pcre, one null byte is enough! */
229 newpattern[j++]='\0';
230 newpattern[j++]='\0';
231 newpattern[j++]='\0';
Guido van Rossum557dea11997-12-22 22:46:52 +0000232 newpattern[j]='\0';
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000233
Guido van Rossum50700601997-12-08 17:15:20 +0000234 rv->regex = pcre_compile((char*)newpattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000235 &error, &erroroffset, dictionary);
236 free(newpattern);
237 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000238 {
239 PyMem_DEL(rv);
240 if (!PyErr_Occurred())
241 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000242 PyObject *errval = Py_BuildValue("si", error, erroroffset);
243 PyErr_SetObject(ErrorObject, errval);
244 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000245 }
246 return NULL;
247 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000248 rv->regex_extra=pcre_study(rv->regex, 0, &error);
249 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000250 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000251 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossum50700601997-12-08 17:15:20 +0000252 PyMem_DEL(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000253 PyErr_SetObject(ErrorObject, errval);
254 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000255 return NULL;
256 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000257 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
258 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000259 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000260 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
261 PyErr_SetObject(ErrorObject, errval);
262 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000263 PyMem_DEL(rv);
264 return NULL;
265 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000266 return (PyObject *)rv;
267}
268
269static PyObject *
Guido van Rossumc3861071997-10-08 02:07:40 +0000270PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
Guido van Rossum50700601997-12-08 17:15:20 +0000271 unsigned char *pattern;
272 int pattern_len, *indexptr, *typeptr;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000273{
Guido van Rossum50700601997-12-08 17:15:20 +0000274 unsigned char c;
275 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000276
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000277 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000278 {
279 PyErr_SetString(ErrorObject, "escape ends too soon");
280 return NULL;
281 }
282 c=pattern[index]; index++;
283 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000284
Guido van Rossum50700601997-12-08 17:15:20 +0000285 switch (c)
286 {
287 case('t'):
288 *indexptr=index;
289 return Py_BuildValue("c", (char)9);
290 break;
291 case('n'):
292 *indexptr = index;
293 return Py_BuildValue("c", (char)10);
294 break;
295 case('v'):
296 *indexptr = index;
297 return Py_BuildValue("c", (char)11);
298 break;
299 case('r'):
300 *indexptr = index;
301 return Py_BuildValue("c", (char)13);
302 break;
303 case('f'):
304 *indexptr = index;
305 return Py_BuildValue("c", (char)12);
306 break;
307 case('a'):
308 *indexptr = index;
309 return Py_BuildValue("c", (char)7);
310 break;
311 case('b'):
312 *indexptr=index;
313 return Py_BuildValue("c", (char)8);
314 break;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000315
Guido van Rossum50700601997-12-08 17:15:20 +0000316 case('x'):
317 {
318 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000319
Guido van Rossum50700601997-12-08 17:15:20 +0000320 x = 0; end = index;
321 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
322 {
323 ch = pattern[end];
324 x = x * 16 + pcre_lcc[ch] -
325 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
326 x &= 255;
327 end++;
328 }
329 if (end==index)
330 {
331 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
332 return NULL;
333 }
334 *indexptr = end;
335 return Py_BuildValue("c", (char)x);
336 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000337 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000338
339 case('E'): case('G'): case('L'): case('Q'):
340 case('U'): case('l'): case('u'):
341 {
342 char message[50];
343 sprintf(message, "\\%c is not allowed", c);
344 PyErr_SetString(ErrorObject, message);
345 return NULL;
346 }
347
348 case('g'):
349 {
350 int end, i;
351 if (pattern_len<=index)
352 {
353 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
354 return NULL;
355 }
356 if (pattern[index]!='<')
357 {
358 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
359 return NULL;
360 }
361 index++;
362 end=index;
363 while (end<pattern_len && pattern[end]!='>')
364 end++;
365 if (end==pattern_len)
366 {
367 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
368 return NULL;
369 }
370
371 if (index==end) /* Zero-length name */
372 {
373 /* XXX should include the text of the reference */
374 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
375 return NULL;
376 }
377 if (!(pcre_ctypes[pattern[index]] & ctype_word) /* First char. not alphanumeric */
378 || (pcre_ctypes[pattern[index]] & ctype_digit) ) /* First char. a digit */
379 {
380 /* XXX should include the text of the reference */
381 PyErr_SetString(ErrorObject, "first character of symbolic reference not a letter or _");
382 return NULL;
383 }
384
385 for(i=index+1; i<end; i++)
386 {
387 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
388 {
389 /* XXX should include the text of the reference */
390 PyErr_SetString(ErrorObject, "illegal symbolic reference");
391 return NULL;
392 }
393 }
394
395 *typeptr = MEMORY_REFERENCE;
396 *indexptr = end+1;
397 return Py_BuildValue("s#", pattern+index, end-index);
398 }
399 break;
400
401 case('0'):
402 {
403 /* \0 always indicates an octal escape, so we consume up to 3
404 characters, as long as they're all octal digits */
405 int octval=0, i;
406 index--;
407 for(i=index;
408 i<=index+2 && i<pattern_len
409 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
410 i++)
411 {
412 octval = octval * 8 + pattern[i] - '0';
413 }
414 if (octval>255)
415 {
416 PyErr_SetString(ErrorObject, "octal value out of range");
417 return NULL;
418 }
419 *indexptr = i;
420 return Py_BuildValue("c", (unsigned char)octval);
421 }
422 break;
423 case('1'): case('2'): case('3'): case('4'):
424 case('5'): case('6'): case('7'): case('8'):
425 case('9'):
426 {
427 /* Handle \?, where ? is from 1 through 9 */
428 int value=0;
429 index--;
430 /* If it's at least a two-digit reference, like \34, it might
431 either be a 3-digit octal escape (\123) or a 2-digit
432 decimal memory reference (\34) */
433
434 if ( (index+1) <pattern_len &&
435 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
436 {
437 if ( (index+2) <pattern_len &&
438 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
439 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
440 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
441 )
442 {
443 /* 3 octal digits */
444 value= 8*8*(pattern[index ]-'0') +
445 8*(pattern[index+1]-'0') +
446 (pattern[index+2]-'0');
447 if (value>255)
448 {
449 PyErr_SetString(ErrorObject, "octal value out of range");
450 return NULL;
451 }
452 *indexptr = index+3;
453 return Py_BuildValue("c", (unsigned char)value);
454 }
455 else
456 {
457 /* 2-digit form, so it's a memory reference */
458 value= 10*(pattern[index ]-'0') +
459 (pattern[index+1]-'0');
460 if (value<1 || EXTRACT_MAX<=value)
461 {
462 PyErr_SetString(ErrorObject, "memory reference out of range");
463 return NULL;
464 }
465 *typeptr = MEMORY_REFERENCE;
466 *indexptr = index+2;
467 return Py_BuildValue("i", value);
468 }
469 }
470 else
471 {
472 /* Single-digit form, like \2, so it's a memory reference */
473 *typeptr = MEMORY_REFERENCE;
474 *indexptr = index+1;
475 return Py_BuildValue("i", pattern[index]-'0');
476 }
477 }
478 break;
479
480 default:
481 *indexptr = index;
482 return Py_BuildValue("c", c);
483 break;
484 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000485}
486
487static PyObject *
488PyPcre_expand(self, args)
489 PyObject *self;
490 PyObject *args;
491{
Guido van Rossum50700601997-12-08 17:15:20 +0000492 PyObject *results, *match_obj;
493 PyObject *repl_obj, *newstring;
494 unsigned char *repl;
495 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000496
Guido van Rossum50700601997-12-08 17:15:20 +0000497 if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj))
498 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000499
Guido van Rossum50700601997-12-08 17:15:20 +0000500 repl=(unsigned char *)PyString_AsString(repl_obj);
501 size=PyString_Size(repl_obj);
502 results=PyList_New(0);
503 if (results==NULL) return NULL;
504 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000505 {
Guido van Rossum50700601997-12-08 17:15:20 +0000506 if (repl[i]=='\\')
507 {
508 PyObject *value;
509 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000510
Guido van Rossum50700601997-12-08 17:15:20 +0000511 if (start!=i)
512 {
513 PyList_Append(results,
514 PyString_FromStringAndSize((char *)repl+start, i-start));
515 total_len += i-start;
516 }
517 i++;
518 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
519 if (value==NULL)
520 {
521 /* PyPcre_expand_escape triggered an exception of some sort,
522 so just return */
523 Py_DECREF(results);
524 return NULL;
525 }
526 switch (escape_type)
527 {
528 case (CHAR):
529 PyList_Append(results, value);
530 total_len += PyString_Size(value);
531 break;
532 case(MEMORY_REFERENCE):
533 {
534 PyObject *r, *tuple, *result;
535 r=PyObject_GetAttrString(match_obj, "group");
536 tuple=PyTuple_New(1);
537 Py_INCREF(value);
538 PyTuple_SetItem(tuple, 0, value);
539 result=PyEval_CallObject(r, tuple);
540 Py_DECREF(r); Py_DECREF(tuple);
541 if (result==NULL)
542 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000543 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000544 Py_DECREF(results);
545 Py_DECREF(value);
546 return NULL;
547 }
548 if (result==Py_None)
549 {
550 char message[50];
551 sprintf(message,
552 "group did not contribute to the match");
553 PyErr_SetString(ErrorObject,
554 message);
555 Py_DECREF(result);
556 Py_DECREF(value);
557 Py_DECREF(results);
558 return NULL;
559 }
560 /* typecheck that it's a string! */
561 if (!PyString_Check(result))
562 {
563 Py_DECREF(results);
564 Py_DECREF(result);
565 PyErr_SetString(ErrorObject,
566 "group() must return a string value for replacement");
567 return NULL;
568 }
569 PyList_Append(results, result);
570 total_len += PyString_Size(result);
571 Py_DECREF(result);
572 }
573 break;
574 default:
575 Py_DECREF(results);
576 PyErr_SetString(ErrorObject,
577 "bad escape in replacement");
578 return NULL;
579 }
580 Py_DECREF(value);
581 start=i;
582 i--; /* Decrement now, because the 'for' loop will increment it */
583 }
584 } /* endif repl[i]!='\\' */
585
586 if (start!=i)
587 {
588 PyList_Append(results, PyString_FromStringAndSize((char *)repl+start, i-start));
589 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000590 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000591
Guido van Rossum50700601997-12-08 17:15:20 +0000592 /* Whew! Now we've constructed a list containing various pieces of
593 strings that will make up our final result. So, iterate over
594 the list concatenating them. A new string measuring total_len
595 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000596
Guido van Rossum50700601997-12-08 17:15:20 +0000597 newstring=PyString_FromStringAndSize(NULL, total_len);
598 if (newstring==NULL)
599 {
600 Py_DECREF(results);
601 return NULL;
602 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000603
Guido van Rossum50700601997-12-08 17:15:20 +0000604 repl=(unsigned char *)PyString_AsString(newstring);
605 for (pos=i=0; i<PyList_Size(results); i++)
606 {
607 PyObject *item=PyList_GetItem(results, i);
608 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
609 pos += PyString_Size(item);
610 }
611 Py_DECREF(results);
612 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000613}
614
615
616/* List of functions defined in the module */
617
618static PyMethodDef pcre_methods[] = {
619 {"pcre_compile", PyPcre_compile, 1},
620 {"pcre_expand", PyPcre_expand, 1},
621 {NULL, NULL} /* sentinel */
622};
623
624
625/*
626 * Convenience routine to export an integer value.
627 * For simplicity, errors (which are unlikely anyway) are ignored.
628 */
629
630static void
631insint(d, name, value)
632 PyObject * d;
633 char * name;
634 int value;
635{
636 PyObject *v = PyInt_FromLong((long) value);
637 if (v == NULL) {
638 /* Don't bother reporting this error */
639 PyErr_Clear();
640 }
641 else {
642 PyDict_SetItemString(d, name, v);
643 Py_DECREF(v);
644 }
645}
646
647
648/* Initialization function for the module (*must* be called initpcre) */
649
650void
651initpcre()
652{
653 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000654
655 /* Create the module and add the functions */
656 m = Py_InitModule("pcre", pcre_methods);
657
658 /* Add some symbolic constants to the module */
659 d = PyModule_GetDict(m);
660 ErrorObject = PyString_FromString("pcre.error");
661 PyDict_SetItemString(d, "error", ErrorObject);
662
663 /* Insert the flags */
664 insint(d, "IGNORECASE", PCRE_CASELESS);
665 insint(d, "ANCHORED", PCRE_ANCHORED);
666 insint(d, "MULTILINE", PCRE_MULTILINE);
667 insint(d, "DOTALL", PCRE_DOTALL);
668 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000669 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000670
671 /* Check for errors */
672 if (PyErr_Occurred())
673 Py_FatalError("can't initialize module pcre");
674}
675