blob: 07a36fa6b2cc26bd3696263cf9e2d104f4866f49 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* Pcre objects */
33
34#include "Python.h"
35
Guido van Rossum58132c61997-12-17 00:24:13 +000036#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000037#ifndef Py_eval_input
38/* For Python 1.4, graminit.h has to be explicitly included */
39#include "graminit.h"
40#define Py_eval_input eval_input
41#endif
42
43#ifndef FOR_PYTHON
44#define FOR_PYTHON
45#endif
46
47#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000048#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000049
50static PyObject *ErrorObject;
51
52typedef struct {
53 PyObject_HEAD
54 pcre *regex;
55 pcre_extra *regex_extra;
56 int num_groups;
57} PcreObject;
58
59staticforward PyTypeObject Pcre_Type;
60
61#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
62#define NORMAL 0
63#define CHARCLASS 1
64#define REPLACEMENT 2
65
66#define CHAR 0
67#define MEMORY_REFERENCE 1
68#define SYNTAX 2
69#define NOT_SYNTAX 3
70#define SET 4
71#define WORD_BOUNDARY 5
72#define NOT_WORD_BOUNDARY 6
73#define BEGINNING_OF_BUFFER 7
74#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000075#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000076
77static PcreObject *
78newPcreObject(arg)
79 PyObject *arg;
80{
81 PcreObject *self;
82 self = PyObject_NEW(PcreObject, &Pcre_Type);
83 if (self == NULL)
84 return NULL;
85 self->regex = NULL;
86 self->regex_extra = NULL;
87 return self;
88}
89
90/* Pcre methods */
91
92static void
93PyPcre_dealloc(self)
94 PcreObject *self;
95{
96 if (self->regex) free(self->regex);
97 if (self->regex_extra) free(self->regex_extra);
98 self->regex=NULL;
99 self->regex_extra=NULL;
100 PyMem_DEL(self);
101}
102
103
104static PyObject *
105PyPcre_exec(self, args)
106 PcreObject *self;
107 PyObject *args;
108{
Guido van Rossum50700601997-12-08 17:15:20 +0000109 char *string;
110 int stringlen, pos = 0, options=0, endpos = -1, i, count;
111 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000112 PyObject *list;
113
Guido van Rossum50700601997-12-08 17:15:20 +0000114 if (!PyArg_ParseTuple(args, "s#|iiii", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000115 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +0000116 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000117 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum816671c1998-03-10 04:55:29 +0000118 (char *)string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000119 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +0000120 /* If an error occurred during the match, and an exception was raised,
121 just return NULL and leave the exception alone. The most likely
122 problem to cause this would be running out of memory for
123 the failure stack. */
124 if (PyErr_Occurred())
125 {
126 return NULL;
127 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000128 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
129 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000130 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000131 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
132 PyErr_SetObject(ErrorObject, errval);
133 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000134 return NULL;
135 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000136
137 list=PyList_New(self->num_groups+1);
138 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000139 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000140 {
141 PyObject *v;
142 int start=offsets[i*2], end=offsets[i*2+1];
143 /* If the group wasn't affected by the match, return -1, -1 */
144 if (start<0 || count<=i)
145 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000146 v=Py_BuildValue("ii", start, end);
147 if (v==NULL) {Py_DECREF(list); return NULL;}
148 PyList_SetItem(list, i, v);
149 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000150 return list;
151}
152
153static PyMethodDef Pcre_methods[] = {
154 {"match", (PyCFunction)PyPcre_exec, 1},
155 {NULL, NULL} /* sentinel */
156};
157
158static PyObject *
159PyPcre_getattr(self, name)
160 PcreObject *self;
161 char *name;
162{
163 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
164}
165
166
167staticforward PyTypeObject Pcre_Type = {
168 PyObject_HEAD_INIT(&PyType_Type)
169 0, /*ob_size*/
170 "Pcre", /*tp_name*/
171 sizeof(PcreObject), /*tp_basicsize*/
172 0, /*tp_itemsize*/
173 /* methods */
174 (destructor)PyPcre_dealloc, /*tp_dealloc*/
175 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000176 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000177 0, /*tp_setattr*/
178 0, /*tp_compare*/
179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash*/
184};
185/* --------------------------------------------------------------------- */
186
187static PyObject *
188PyPcre_compile(self, args)
189 PyObject *self; /* Not used */
190 PyObject *args;
191{
192 PcreObject *rv;
193 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000194 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000195 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000196 int num_zeros, i, j;
197
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000198 int options, erroroffset;
199 if (!PyArg_ParseTuple(args, "siO!", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000200 &PyDict_Type, &dictionary))
201 return NULL;
202 rv = newPcreObject(args);
203 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000204 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000205
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000206 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000207 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000208 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000209 {
210 PyMem_DEL(rv);
211 if (!PyErr_Occurred())
212 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000213 PyObject *errval = Py_BuildValue("si", error, erroroffset);
214 PyErr_SetObject(ErrorObject, errval);
215 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000216 }
217 return NULL;
218 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000219 rv->regex_extra=pcre_study(rv->regex, 0, &error);
220 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000221 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000222 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossum50700601997-12-08 17:15:20 +0000223 PyMem_DEL(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000224 PyErr_SetObject(ErrorObject, errval);
225 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000226 return NULL;
227 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000228 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
229 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000230 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000231 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
232 PyErr_SetObject(ErrorObject, errval);
233 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000234 PyMem_DEL(rv);
235 return NULL;
236 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000237 return (PyObject *)rv;
238}
239
240static PyObject *
Guido van Rossumc3861071997-10-08 02:07:40 +0000241PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
Guido van Rossum50700601997-12-08 17:15:20 +0000242 unsigned char *pattern;
243 int pattern_len, *indexptr, *typeptr;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000244{
Guido van Rossum50700601997-12-08 17:15:20 +0000245 unsigned char c;
246 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000247
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000248 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000249 {
250 PyErr_SetString(ErrorObject, "escape ends too soon");
251 return NULL;
252 }
253 c=pattern[index]; index++;
254 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000255
Guido van Rossum50700601997-12-08 17:15:20 +0000256 switch (c)
257 {
258 case('t'):
259 *indexptr=index;
260 return Py_BuildValue("c", (char)9);
261 break;
262 case('n'):
263 *indexptr = index;
264 return Py_BuildValue("c", (char)10);
265 break;
266 case('v'):
267 *indexptr = index;
268 return Py_BuildValue("c", (char)11);
269 break;
270 case('r'):
271 *indexptr = index;
272 return Py_BuildValue("c", (char)13);
273 break;
274 case('f'):
275 *indexptr = index;
276 return Py_BuildValue("c", (char)12);
277 break;
278 case('a'):
279 *indexptr = index;
280 return Py_BuildValue("c", (char)7);
281 break;
282 case('b'):
283 *indexptr=index;
284 return Py_BuildValue("c", (char)8);
285 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000286 case('\\'):
287 *indexptr=index;
288 return Py_BuildValue("c", '\\');
289 break;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000290
Guido van Rossum50700601997-12-08 17:15:20 +0000291 case('x'):
292 {
293 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000294
Guido van Rossum50700601997-12-08 17:15:20 +0000295 x = 0; end = index;
296 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
297 {
298 ch = pattern[end];
299 x = x * 16 + pcre_lcc[ch] -
300 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
301 x &= 255;
302 end++;
303 }
304 if (end==index)
305 {
306 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
307 return NULL;
308 }
309 *indexptr = end;
310 return Py_BuildValue("c", (char)x);
311 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000312 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000313
314 case('E'): case('G'): case('L'): case('Q'):
315 case('U'): case('l'): case('u'):
316 {
317 char message[50];
318 sprintf(message, "\\%c is not allowed", c);
319 PyErr_SetString(ErrorObject, message);
320 return NULL;
321 }
322
323 case('g'):
324 {
325 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000326 int group_num = 0, is_number=0;
327
Guido van Rossum50700601997-12-08 17:15:20 +0000328 if (pattern_len<=index)
329 {
330 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
331 return NULL;
332 }
333 if (pattern[index]!='<')
334 {
335 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
336 return NULL;
337 }
338 index++;
339 end=index;
340 while (end<pattern_len && pattern[end]!='>')
341 end++;
342 if (end==pattern_len)
343 {
344 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
345 return NULL;
346 }
347
348 if (index==end) /* Zero-length name */
349 {
350 /* XXX should include the text of the reference */
351 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
352 return NULL;
353 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000354 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000355 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000356 is_number = 1;
357 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000358 }
359
360 for(i=index+1; i<end; i++)
361 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000362 if (is_number &&
363 !(pcre_ctypes[pattern[i]] & ctype_digit) )
364 {
365 /* XXX should include the text of the reference */
366 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
367 return NULL;
368 }
369 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000370 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
371 {
372 /* XXX should include the text of the reference */
373 PyErr_SetString(ErrorObject, "illegal symbolic reference");
374 return NULL;
375 }
376 }
377
378 *typeptr = MEMORY_REFERENCE;
379 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000380 /* If it's a number, return the integer value of the group */
381 if (is_number) return Py_BuildValue("i", group_num);
382 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000383 return Py_BuildValue("s#", pattern+index, end-index);
384 }
385 break;
386
387 case('0'):
388 {
389 /* \0 always indicates an octal escape, so we consume up to 3
390 characters, as long as they're all octal digits */
391 int octval=0, i;
392 index--;
393 for(i=index;
394 i<=index+2 && i<pattern_len
395 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
396 i++)
397 {
398 octval = octval * 8 + pattern[i] - '0';
399 }
400 if (octval>255)
401 {
402 PyErr_SetString(ErrorObject, "octal value out of range");
403 return NULL;
404 }
405 *indexptr = i;
406 return Py_BuildValue("c", (unsigned char)octval);
407 }
408 break;
409 case('1'): case('2'): case('3'): case('4'):
410 case('5'): case('6'): case('7'): case('8'):
411 case('9'):
412 {
413 /* Handle \?, where ? is from 1 through 9 */
414 int value=0;
415 index--;
416 /* If it's at least a two-digit reference, like \34, it might
417 either be a 3-digit octal escape (\123) or a 2-digit
418 decimal memory reference (\34) */
419
420 if ( (index+1) <pattern_len &&
421 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
422 {
423 if ( (index+2) <pattern_len &&
424 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
425 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
426 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
427 )
428 {
429 /* 3 octal digits */
430 value= 8*8*(pattern[index ]-'0') +
431 8*(pattern[index+1]-'0') +
432 (pattern[index+2]-'0');
433 if (value>255)
434 {
435 PyErr_SetString(ErrorObject, "octal value out of range");
436 return NULL;
437 }
438 *indexptr = index+3;
439 return Py_BuildValue("c", (unsigned char)value);
440 }
441 else
442 {
443 /* 2-digit form, so it's a memory reference */
444 value= 10*(pattern[index ]-'0') +
445 (pattern[index+1]-'0');
446 if (value<1 || EXTRACT_MAX<=value)
447 {
448 PyErr_SetString(ErrorObject, "memory reference out of range");
449 return NULL;
450 }
451 *typeptr = MEMORY_REFERENCE;
452 *indexptr = index+2;
453 return Py_BuildValue("i", value);
454 }
455 }
456 else
457 {
458 /* Single-digit form, like \2, so it's a memory reference */
459 *typeptr = MEMORY_REFERENCE;
460 *indexptr = index+1;
461 return Py_BuildValue("i", pattern[index]-'0');
462 }
463 }
464 break;
465
466 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000467 /* It's some unknown escape like \s, so return a string containing
468 \s */
469 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000470 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000471 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000472 break;
473 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000474}
475
476static PyObject *
477PyPcre_expand(self, args)
478 PyObject *self;
479 PyObject *args;
480{
Guido van Rossum50700601997-12-08 17:15:20 +0000481 PyObject *results, *match_obj;
482 PyObject *repl_obj, *newstring;
483 unsigned char *repl;
484 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000485
Guido van Rossum50700601997-12-08 17:15:20 +0000486 if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj))
487 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000488
Guido van Rossum50700601997-12-08 17:15:20 +0000489 repl=(unsigned char *)PyString_AsString(repl_obj);
490 size=PyString_Size(repl_obj);
491 results=PyList_New(0);
492 if (results==NULL) return NULL;
493 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000494 {
Guido van Rossum50700601997-12-08 17:15:20 +0000495 if (repl[i]=='\\')
496 {
497 PyObject *value;
498 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000499
Guido van Rossum50700601997-12-08 17:15:20 +0000500 if (start!=i)
501 {
502 PyList_Append(results,
503 PyString_FromStringAndSize((char *)repl+start, i-start));
504 total_len += i-start;
505 }
506 i++;
507 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
508 if (value==NULL)
509 {
510 /* PyPcre_expand_escape triggered an exception of some sort,
511 so just return */
512 Py_DECREF(results);
513 return NULL;
514 }
515 switch (escape_type)
516 {
517 case (CHAR):
518 PyList_Append(results, value);
519 total_len += PyString_Size(value);
520 break;
521 case(MEMORY_REFERENCE):
522 {
523 PyObject *r, *tuple, *result;
524 r=PyObject_GetAttrString(match_obj, "group");
525 tuple=PyTuple_New(1);
526 Py_INCREF(value);
527 PyTuple_SetItem(tuple, 0, value);
528 result=PyEval_CallObject(r, tuple);
529 Py_DECREF(r); Py_DECREF(tuple);
530 if (result==NULL)
531 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000532 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000533 Py_DECREF(results);
534 Py_DECREF(value);
535 return NULL;
536 }
537 if (result==Py_None)
538 {
539 char message[50];
540 sprintf(message,
541 "group did not contribute to the match");
542 PyErr_SetString(ErrorObject,
543 message);
544 Py_DECREF(result);
545 Py_DECREF(value);
546 Py_DECREF(results);
547 return NULL;
548 }
549 /* typecheck that it's a string! */
550 if (!PyString_Check(result))
551 {
552 Py_DECREF(results);
553 Py_DECREF(result);
554 PyErr_SetString(ErrorObject,
555 "group() must return a string value for replacement");
556 return NULL;
557 }
558 PyList_Append(results, result);
559 total_len += PyString_Size(result);
560 Py_DECREF(result);
561 }
562 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000563 case(STRING):
564 {
565 PyList_Append(results, value);
566 total_len += PyString_Size(value);
567 break;
568 }
Guido van Rossum50700601997-12-08 17:15:20 +0000569 default:
570 Py_DECREF(results);
571 PyErr_SetString(ErrorObject,
572 "bad escape in replacement");
573 return NULL;
574 }
575 Py_DECREF(value);
576 start=i;
577 i--; /* Decrement now, because the 'for' loop will increment it */
578 }
579 } /* endif repl[i]!='\\' */
580
581 if (start!=i)
582 {
583 PyList_Append(results, PyString_FromStringAndSize((char *)repl+start, i-start));
584 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000585 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000586
Guido van Rossum50700601997-12-08 17:15:20 +0000587 /* Whew! Now we've constructed a list containing various pieces of
588 strings that will make up our final result. So, iterate over
589 the list concatenating them. A new string measuring total_len
590 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000591
Guido van Rossum50700601997-12-08 17:15:20 +0000592 newstring=PyString_FromStringAndSize(NULL, total_len);
593 if (newstring==NULL)
594 {
595 Py_DECREF(results);
596 return NULL;
597 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000598
Guido van Rossum50700601997-12-08 17:15:20 +0000599 repl=(unsigned char *)PyString_AsString(newstring);
600 for (pos=i=0; i<PyList_Size(results); i++)
601 {
602 PyObject *item=PyList_GetItem(results, i);
603 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
604 pos += PyString_Size(item);
605 }
606 Py_DECREF(results);
607 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000608}
609
610
611/* List of functions defined in the module */
612
613static PyMethodDef pcre_methods[] = {
614 {"pcre_compile", PyPcre_compile, 1},
615 {"pcre_expand", PyPcre_expand, 1},
616 {NULL, NULL} /* sentinel */
617};
618
619
620/*
621 * Convenience routine to export an integer value.
622 * For simplicity, errors (which are unlikely anyway) are ignored.
623 */
624
625static void
626insint(d, name, value)
627 PyObject * d;
628 char * name;
629 int value;
630{
631 PyObject *v = PyInt_FromLong((long) value);
632 if (v == NULL) {
633 /* Don't bother reporting this error */
634 PyErr_Clear();
635 }
636 else {
637 PyDict_SetItemString(d, name, v);
638 Py_DECREF(v);
639 }
640}
641
642
643/* Initialization function for the module (*must* be called initpcre) */
644
645void
646initpcre()
647{
648 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000649
650 /* Create the module and add the functions */
651 m = Py_InitModule("pcre", pcre_methods);
652
653 /* Add some symbolic constants to the module */
654 d = PyModule_GetDict(m);
655 ErrorObject = PyString_FromString("pcre.error");
656 PyDict_SetItemString(d, "error", ErrorObject);
657
658 /* Insert the flags */
659 insint(d, "IGNORECASE", PCRE_CASELESS);
660 insint(d, "ANCHORED", PCRE_ANCHORED);
661 insint(d, "MULTILINE", PCRE_MULTILINE);
662 insint(d, "DOTALL", PCRE_DOTALL);
663 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000664 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000665
666 /* Check for errors */
667 if (PyErr_Occurred())
668 Py_FatalError("can't initialize module pcre");
669}
670