blob: 2587fa0295c64287468f329adff299fea1a87943 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* Pcre objects */
33
34#include "Python.h"
35
Guido van Rossum58132c61997-12-17 00:24:13 +000036#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000037#ifndef Py_eval_input
38/* For Python 1.4, graminit.h has to be explicitly included */
39#include "graminit.h"
40#define Py_eval_input eval_input
41#endif
42
43#ifndef FOR_PYTHON
44#define FOR_PYTHON
45#endif
46
47#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000048#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000049
50static PyObject *ErrorObject;
51
52typedef struct {
53 PyObject_HEAD
54 pcre *regex;
55 pcre_extra *regex_extra;
56 int num_groups;
57} PcreObject;
58
59staticforward PyTypeObject Pcre_Type;
60
61#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
62#define NORMAL 0
63#define CHARCLASS 1
64#define REPLACEMENT 2
65
66#define CHAR 0
67#define MEMORY_REFERENCE 1
68#define SYNTAX 2
69#define NOT_SYNTAX 3
70#define SET 4
71#define WORD_BOUNDARY 5
72#define NOT_WORD_BOUNDARY 6
73#define BEGINNING_OF_BUFFER 7
74#define END_OF_BUFFER 8
75
76
77static PcreObject *
78newPcreObject(arg)
79 PyObject *arg;
80{
81 PcreObject *self;
82 self = PyObject_NEW(PcreObject, &Pcre_Type);
83 if (self == NULL)
84 return NULL;
85 self->regex = NULL;
86 self->regex_extra = NULL;
87 return self;
88}
89
90/* Pcre methods */
91
92static void
93PyPcre_dealloc(self)
94 PcreObject *self;
95{
96 if (self->regex) free(self->regex);
97 if (self->regex_extra) free(self->regex_extra);
98 self->regex=NULL;
99 self->regex_extra=NULL;
100 PyMem_DEL(self);
101}
102
103
104static PyObject *
105PyPcre_exec(self, args)
106 PcreObject *self;
107 PyObject *args;
108{
Guido van Rossum50700601997-12-08 17:15:20 +0000109 char *string;
110 int stringlen, pos = 0, options=0, endpos = -1, i, count;
111 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000112 PyObject *list;
113
Guido van Rossum50700601997-12-08 17:15:20 +0000114 if (!PyArg_ParseTuple(args, "s#|iiii", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000115 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +0000116 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000117 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum50700601997-12-08 17:15:20 +0000118 (char *)string+pos, endpos - pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000119 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +0000120 /* If an error occurred during the match, and an exception was raised,
121 just return NULL and leave the exception alone. The most likely
122 problem to cause this would be running out of memory for
123 the failure stack. */
124 if (PyErr_Occurred())
125 {
126 return NULL;
127 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000128 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
129 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000130 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000131 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
132 PyErr_SetObject(ErrorObject, errval);
133 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000134 return NULL;
135 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000136
137 list=PyList_New(self->num_groups+1);
138 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000139 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000140 {
141 PyObject *v;
142 int start=offsets[i*2], end=offsets[i*2+1];
143 /* If the group wasn't affected by the match, return -1, -1 */
144 if (start<0 || count<=i)
145 {start=end=-1;}
146 else
147 {start += pos; end +=pos;}
148 v=Py_BuildValue("ii", start, end);
149 if (v==NULL) {Py_DECREF(list); return NULL;}
150 PyList_SetItem(list, i, v);
151 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000152 return list;
153}
154
155static PyMethodDef Pcre_methods[] = {
156 {"match", (PyCFunction)PyPcre_exec, 1},
157 {NULL, NULL} /* sentinel */
158};
159
160static PyObject *
161PyPcre_getattr(self, name)
162 PcreObject *self;
163 char *name;
164{
165 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
166}
167
168
169staticforward PyTypeObject Pcre_Type = {
170 PyObject_HEAD_INIT(&PyType_Type)
171 0, /*ob_size*/
172 "Pcre", /*tp_name*/
173 sizeof(PcreObject), /*tp_basicsize*/
174 0, /*tp_itemsize*/
175 /* methods */
176 (destructor)PyPcre_dealloc, /*tp_dealloc*/
177 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000178 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000179 0, /*tp_setattr*/
180 0, /*tp_compare*/
181 0, /*tp_repr*/
182 0, /*tp_as_number*/
183 0, /*tp_as_sequence*/
184 0, /*tp_as_mapping*/
185 0, /*tp_hash*/
186};
187/* --------------------------------------------------------------------- */
188
189static PyObject *
190PyPcre_compile(self, args)
191 PyObject *self; /* Not used */
192 PyObject *args;
193{
194 PcreObject *rv;
195 PyObject *dictionary;
Guido van Rossum50700601997-12-08 17:15:20 +0000196 char *pattern, *newpattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000197 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000198 int num_zeros, i, j;
199
200 int patternlen, options, erroroffset;
201 if (!PyArg_ParseTuple(args, "s#iO!", &pattern, &patternlen, &options,
202 &PyDict_Type, &dictionary))
203 return NULL;
204 rv = newPcreObject(args);
205 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000206 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000207
208 /* PCRE doesn't like having null bytes in its pattern, so we have to replace
Guido van Rossum58132c61997-12-17 00:24:13 +0000209 any zeros in the string with the characters '\000'. This increases the size
210 of the string by 3*num_zeros, plus 1 byte for the terminating \0. */
211 num_zeros=1; /* Start at 1; this will give 3 extra bytes of leeway */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000212 for(i=0; i<patternlen; i++) {
Guido van Rossum50700601997-12-08 17:15:20 +0000213 if (pattern[i]==0) num_zeros++;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000214 }
Guido van Rossum58132c61997-12-17 00:24:13 +0000215 newpattern=malloc(patternlen + num_zeros*3 + 4);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000216 if (newpattern==NULL) {
Guido van Rossum50700601997-12-08 17:15:20 +0000217 PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern");
218 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000219 }
220 for (i=j=0; i<patternlen; i++, j++)
Guido van Rossum50700601997-12-08 17:15:20 +0000221 {
222 if (pattern[i]!=0) newpattern[j]=pattern[i];
223 else {
Guido van Rossum58132c61997-12-17 00:24:13 +0000224 newpattern[j++] ='\\';
225 newpattern[j++] = '0';
226 newpattern[j++] = '0';
227 newpattern[j ] = '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000228 }
229 }
Guido van Rossum58132c61997-12-17 00:24:13 +0000230 /* Keep purify happy; for pcre, one null byte is enough! */
231 newpattern[j++]='\0';
232 newpattern[j++]='\0';
233 newpattern[j++]='\0';
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000234 newpattern[j]='\0';
235
Guido van Rossum50700601997-12-08 17:15:20 +0000236 rv->regex = pcre_compile((char*)newpattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000237 &error, &erroroffset, dictionary);
238 free(newpattern);
239 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000240 {
241 PyMem_DEL(rv);
242 if (!PyErr_Occurred())
243 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000244 PyObject *errval = Py_BuildValue("si", error, erroroffset);
245 PyErr_SetObject(ErrorObject, errval);
246 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000247 }
248 return NULL;
249 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000250 rv->regex_extra=pcre_study(rv->regex, 0, &error);
251 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000252 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000253 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossum50700601997-12-08 17:15:20 +0000254 PyMem_DEL(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000255 PyErr_SetObject(ErrorObject, errval);
256 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000257 return NULL;
258 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000259 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
260 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000261 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000262 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
263 PyErr_SetObject(ErrorObject, errval);
264 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000265 PyMem_DEL(rv);
266 return NULL;
267 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000268 return (PyObject *)rv;
269}
270
271static PyObject *
Guido van Rossumc3861071997-10-08 02:07:40 +0000272PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
Guido van Rossum50700601997-12-08 17:15:20 +0000273 unsigned char *pattern;
274 int pattern_len, *indexptr, *typeptr;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000275{
Guido van Rossum50700601997-12-08 17:15:20 +0000276 unsigned char c;
277 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000278
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000279 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000280 {
281 PyErr_SetString(ErrorObject, "escape ends too soon");
282 return NULL;
283 }
284 c=pattern[index]; index++;
285 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000286
Guido van Rossum50700601997-12-08 17:15:20 +0000287 switch (c)
288 {
289 case('t'):
290 *indexptr=index;
291 return Py_BuildValue("c", (char)9);
292 break;
293 case('n'):
294 *indexptr = index;
295 return Py_BuildValue("c", (char)10);
296 break;
297 case('v'):
298 *indexptr = index;
299 return Py_BuildValue("c", (char)11);
300 break;
301 case('r'):
302 *indexptr = index;
303 return Py_BuildValue("c", (char)13);
304 break;
305 case('f'):
306 *indexptr = index;
307 return Py_BuildValue("c", (char)12);
308 break;
309 case('a'):
310 *indexptr = index;
311 return Py_BuildValue("c", (char)7);
312 break;
313 case('b'):
314 *indexptr=index;
315 return Py_BuildValue("c", (char)8);
316 break;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000317
Guido van Rossum50700601997-12-08 17:15:20 +0000318 case('x'):
319 {
320 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000321
Guido van Rossum50700601997-12-08 17:15:20 +0000322 x = 0; end = index;
323 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
324 {
325 ch = pattern[end];
326 x = x * 16 + pcre_lcc[ch] -
327 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
328 x &= 255;
329 end++;
330 }
331 if (end==index)
332 {
333 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
334 return NULL;
335 }
336 *indexptr = end;
337 return Py_BuildValue("c", (char)x);
338 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000339 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000340
341 case('E'): case('G'): case('L'): case('Q'):
342 case('U'): case('l'): case('u'):
343 {
344 char message[50];
345 sprintf(message, "\\%c is not allowed", c);
346 PyErr_SetString(ErrorObject, message);
347 return NULL;
348 }
349
350 case('g'):
351 {
352 int end, i;
353 if (pattern_len<=index)
354 {
355 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
356 return NULL;
357 }
358 if (pattern[index]!='<')
359 {
360 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
361 return NULL;
362 }
363 index++;
364 end=index;
365 while (end<pattern_len && pattern[end]!='>')
366 end++;
367 if (end==pattern_len)
368 {
369 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
370 return NULL;
371 }
372
373 if (index==end) /* Zero-length name */
374 {
375 /* XXX should include the text of the reference */
376 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
377 return NULL;
378 }
379 if (!(pcre_ctypes[pattern[index]] & ctype_word) /* First char. not alphanumeric */
380 || (pcre_ctypes[pattern[index]] & ctype_digit) ) /* First char. a digit */
381 {
382 /* XXX should include the text of the reference */
383 PyErr_SetString(ErrorObject, "first character of symbolic reference not a letter or _");
384 return NULL;
385 }
386
387 for(i=index+1; i<end; i++)
388 {
389 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
390 {
391 /* XXX should include the text of the reference */
392 PyErr_SetString(ErrorObject, "illegal symbolic reference");
393 return NULL;
394 }
395 }
396
397 *typeptr = MEMORY_REFERENCE;
398 *indexptr = end+1;
399 return Py_BuildValue("s#", pattern+index, end-index);
400 }
401 break;
402
403 case('0'):
404 {
405 /* \0 always indicates an octal escape, so we consume up to 3
406 characters, as long as they're all octal digits */
407 int octval=0, i;
408 index--;
409 for(i=index;
410 i<=index+2 && i<pattern_len
411 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
412 i++)
413 {
414 octval = octval * 8 + pattern[i] - '0';
415 }
416 if (octval>255)
417 {
418 PyErr_SetString(ErrorObject, "octal value out of range");
419 return NULL;
420 }
421 *indexptr = i;
422 return Py_BuildValue("c", (unsigned char)octval);
423 }
424 break;
425 case('1'): case('2'): case('3'): case('4'):
426 case('5'): case('6'): case('7'): case('8'):
427 case('9'):
428 {
429 /* Handle \?, where ? is from 1 through 9 */
430 int value=0;
431 index--;
432 /* If it's at least a two-digit reference, like \34, it might
433 either be a 3-digit octal escape (\123) or a 2-digit
434 decimal memory reference (\34) */
435
436 if ( (index+1) <pattern_len &&
437 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
438 {
439 if ( (index+2) <pattern_len &&
440 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
441 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
442 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
443 )
444 {
445 /* 3 octal digits */
446 value= 8*8*(pattern[index ]-'0') +
447 8*(pattern[index+1]-'0') +
448 (pattern[index+2]-'0');
449 if (value>255)
450 {
451 PyErr_SetString(ErrorObject, "octal value out of range");
452 return NULL;
453 }
454 *indexptr = index+3;
455 return Py_BuildValue("c", (unsigned char)value);
456 }
457 else
458 {
459 /* 2-digit form, so it's a memory reference */
460 value= 10*(pattern[index ]-'0') +
461 (pattern[index+1]-'0');
462 if (value<1 || EXTRACT_MAX<=value)
463 {
464 PyErr_SetString(ErrorObject, "memory reference out of range");
465 return NULL;
466 }
467 *typeptr = MEMORY_REFERENCE;
468 *indexptr = index+2;
469 return Py_BuildValue("i", value);
470 }
471 }
472 else
473 {
474 /* Single-digit form, like \2, so it's a memory reference */
475 *typeptr = MEMORY_REFERENCE;
476 *indexptr = index+1;
477 return Py_BuildValue("i", pattern[index]-'0');
478 }
479 }
480 break;
481
482 default:
483 *indexptr = index;
484 return Py_BuildValue("c", c);
485 break;
486 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000487}
488
489static PyObject *
490PyPcre_expand(self, args)
491 PyObject *self;
492 PyObject *args;
493{
Guido van Rossum50700601997-12-08 17:15:20 +0000494 PyObject *results, *match_obj;
495 PyObject *repl_obj, *newstring;
496 unsigned char *repl;
497 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000498
Guido van Rossum50700601997-12-08 17:15:20 +0000499 if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj))
500 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000501
Guido van Rossum50700601997-12-08 17:15:20 +0000502 repl=(unsigned char *)PyString_AsString(repl_obj);
503 size=PyString_Size(repl_obj);
504 results=PyList_New(0);
505 if (results==NULL) return NULL;
506 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000507 {
Guido van Rossum50700601997-12-08 17:15:20 +0000508 if (repl[i]=='\\')
509 {
510 PyObject *value;
511 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000512
Guido van Rossum50700601997-12-08 17:15:20 +0000513 if (start!=i)
514 {
515 PyList_Append(results,
516 PyString_FromStringAndSize((char *)repl+start, i-start));
517 total_len += i-start;
518 }
519 i++;
520 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
521 if (value==NULL)
522 {
523 /* PyPcre_expand_escape triggered an exception of some sort,
524 so just return */
525 Py_DECREF(results);
526 return NULL;
527 }
528 switch (escape_type)
529 {
530 case (CHAR):
531 PyList_Append(results, value);
532 total_len += PyString_Size(value);
533 break;
534 case(MEMORY_REFERENCE):
535 {
536 PyObject *r, *tuple, *result;
537 r=PyObject_GetAttrString(match_obj, "group");
538 tuple=PyTuple_New(1);
539 Py_INCREF(value);
540 PyTuple_SetItem(tuple, 0, value);
541 result=PyEval_CallObject(r, tuple);
542 Py_DECREF(r); Py_DECREF(tuple);
543 if (result==NULL)
544 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000545 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000546 Py_DECREF(results);
547 Py_DECREF(value);
548 return NULL;
549 }
550 if (result==Py_None)
551 {
552 char message[50];
553 sprintf(message,
554 "group did not contribute to the match");
555 PyErr_SetString(ErrorObject,
556 message);
557 Py_DECREF(result);
558 Py_DECREF(value);
559 Py_DECREF(results);
560 return NULL;
561 }
562 /* typecheck that it's a string! */
563 if (!PyString_Check(result))
564 {
565 Py_DECREF(results);
566 Py_DECREF(result);
567 PyErr_SetString(ErrorObject,
568 "group() must return a string value for replacement");
569 return NULL;
570 }
571 PyList_Append(results, result);
572 total_len += PyString_Size(result);
573 Py_DECREF(result);
574 }
575 break;
576 default:
577 Py_DECREF(results);
578 PyErr_SetString(ErrorObject,
579 "bad escape in replacement");
580 return NULL;
581 }
582 Py_DECREF(value);
583 start=i;
584 i--; /* Decrement now, because the 'for' loop will increment it */
585 }
586 } /* endif repl[i]!='\\' */
587
588 if (start!=i)
589 {
590 PyList_Append(results, PyString_FromStringAndSize((char *)repl+start, i-start));
591 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000592 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000593
Guido van Rossum50700601997-12-08 17:15:20 +0000594 /* Whew! Now we've constructed a list containing various pieces of
595 strings that will make up our final result. So, iterate over
596 the list concatenating them. A new string measuring total_len
597 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000598
Guido van Rossum50700601997-12-08 17:15:20 +0000599 newstring=PyString_FromStringAndSize(NULL, total_len);
600 if (newstring==NULL)
601 {
602 Py_DECREF(results);
603 return NULL;
604 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000605
Guido van Rossum50700601997-12-08 17:15:20 +0000606 repl=(unsigned char *)PyString_AsString(newstring);
607 for (pos=i=0; i<PyList_Size(results); i++)
608 {
609 PyObject *item=PyList_GetItem(results, i);
610 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
611 pos += PyString_Size(item);
612 }
613 Py_DECREF(results);
614 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000615}
616
617
618/* List of functions defined in the module */
619
620static PyMethodDef pcre_methods[] = {
621 {"pcre_compile", PyPcre_compile, 1},
622 {"pcre_expand", PyPcre_expand, 1},
623 {NULL, NULL} /* sentinel */
624};
625
626
627/*
628 * Convenience routine to export an integer value.
629 * For simplicity, errors (which are unlikely anyway) are ignored.
630 */
631
632static void
633insint(d, name, value)
634 PyObject * d;
635 char * name;
636 int value;
637{
638 PyObject *v = PyInt_FromLong((long) value);
639 if (v == NULL) {
640 /* Don't bother reporting this error */
641 PyErr_Clear();
642 }
643 else {
644 PyDict_SetItemString(d, name, v);
645 Py_DECREF(v);
646 }
647}
648
649
650/* Initialization function for the module (*must* be called initpcre) */
651
652void
653initpcre()
654{
655 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000656
657 /* Create the module and add the functions */
658 m = Py_InitModule("pcre", pcre_methods);
659
660 /* Add some symbolic constants to the module */
661 d = PyModule_GetDict(m);
662 ErrorObject = PyString_FromString("pcre.error");
663 PyDict_SetItemString(d, "error", ErrorObject);
664
665 /* Insert the flags */
666 insint(d, "IGNORECASE", PCRE_CASELESS);
667 insint(d, "ANCHORED", PCRE_ANCHORED);
668 insint(d, "MULTILINE", PCRE_MULTILINE);
669 insint(d, "DOTALL", PCRE_DOTALL);
670 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000671 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000672
673 /* Check for errors */
674 if (PyErr_Occurred())
675 Py_FatalError("can't initialize module pcre");
676}
677