blob: fa986964887881d8f344f145266ed5d236c7c7b2 [file] [log] [blame]
Guido van Rossum51b3aa31997-10-06 14:43:11 +00001/***********************************************************
Guido van Rossum58132c61997-12-17 00:24:13 +00002Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
Guido van Rossum51b3aa31997-10-06 14:43:11 +00003The Netherlands.
4
5 All Rights Reserved
6
7Permission to use, copy, modify, and distribute this software and its
8documentation for any purpose and without fee is hereby granted,
9provided that the above copyright notice appear in all copies and that
10both that copyright notice and this permission notice appear in
11supporting documentation, and that the names of Stichting Mathematisch
12Centrum or CWI or Corporation for National Research Initiatives or
13CNRI not be used in advertising or publicity pertaining to
14distribution of the software without specific, written prior
15permission.
16
17While CWI is the initial source for this software, a modified version
18is made available by the Corporation for National Research Initiatives
19(CNRI) at the Internet address ftp://ftp.python.org.
20
21STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28PERFORMANCE OF THIS SOFTWARE.
29
30******************************************************************/
31
32/* Pcre objects */
33
34#include "Python.h"
35
Guido van Rossum58132c61997-12-17 00:24:13 +000036#include <assert.h>
Guido van Rossum51b3aa31997-10-06 14:43:11 +000037#ifndef Py_eval_input
38/* For Python 1.4, graminit.h has to be explicitly included */
39#include "graminit.h"
40#define Py_eval_input eval_input
41#endif
42
43#ifndef FOR_PYTHON
44#define FOR_PYTHON
45#endif
46
47#include "pcre.h"
Guido van Rossum58132c61997-12-17 00:24:13 +000048#include "pcre-int.h"
Guido van Rossum51b3aa31997-10-06 14:43:11 +000049
50static PyObject *ErrorObject;
51
52typedef struct {
53 PyObject_HEAD
54 pcre *regex;
55 pcre_extra *regex_extra;
56 int num_groups;
57} PcreObject;
58
59staticforward PyTypeObject Pcre_Type;
60
61#define PcreObject_Check(v) ((v)->ob_type == &Pcre_Type)
62#define NORMAL 0
63#define CHARCLASS 1
64#define REPLACEMENT 2
65
66#define CHAR 0
67#define MEMORY_REFERENCE 1
68#define SYNTAX 2
69#define NOT_SYNTAX 3
70#define SET 4
71#define WORD_BOUNDARY 5
72#define NOT_WORD_BOUNDARY 6
73#define BEGINNING_OF_BUFFER 7
74#define END_OF_BUFFER 8
Guido van Rossum042ff9e1998-04-03 21:13:31 +000075#define STRING 9
Guido van Rossum51b3aa31997-10-06 14:43:11 +000076
77static PcreObject *
78newPcreObject(arg)
79 PyObject *arg;
80{
81 PcreObject *self;
82 self = PyObject_NEW(PcreObject, &Pcre_Type);
83 if (self == NULL)
84 return NULL;
85 self->regex = NULL;
86 self->regex_extra = NULL;
87 return self;
88}
89
90/* Pcre methods */
91
92static void
93PyPcre_dealloc(self)
94 PcreObject *self;
95{
96 if (self->regex) free(self->regex);
97 if (self->regex_extra) free(self->regex_extra);
98 self->regex=NULL;
99 self->regex_extra=NULL;
100 PyMem_DEL(self);
101}
102
103
104static PyObject *
105PyPcre_exec(self, args)
106 PcreObject *self;
107 PyObject *args;
108{
Guido van Rossum50700601997-12-08 17:15:20 +0000109 char *string;
110 int stringlen, pos = 0, options=0, endpos = -1, i, count;
111 int offsets[100*2];
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000112 PyObject *list;
113
Guido van Rossum7e488981998-10-08 02:25:24 +0000114 if (!PyArg_ParseTuple(args, "t#|iiii", &string, &stringlen, &pos, &endpos, &options))
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000115 return NULL;
Guido van Rossum50700601997-12-08 17:15:20 +0000116 if (endpos == -1) {endpos = stringlen;}
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000117 count = pcre_exec(self->regex, self->regex_extra,
Guido van Rossum7e488981998-10-08 02:25:24 +0000118 string, endpos, pos, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000119 offsets, sizeof(offsets)/sizeof(int) );
Guido van Rossum50700601997-12-08 17:15:20 +0000120 /* If an error occurred during the match, and an exception was raised,
121 just return NULL and leave the exception alone. The most likely
122 problem to cause this would be running out of memory for
123 the failure stack. */
124 if (PyErr_Occurred())
125 {
126 return NULL;
127 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000128 if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
129 if (count<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000130 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000131 PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
132 PyErr_SetObject(ErrorObject, errval);
133 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000134 return NULL;
135 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000136
137 list=PyList_New(self->num_groups+1);
138 if (list==NULL) return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000139 for(i=0; i<=self->num_groups; i++)
Guido van Rossum50700601997-12-08 17:15:20 +0000140 {
141 PyObject *v;
142 int start=offsets[i*2], end=offsets[i*2+1];
143 /* If the group wasn't affected by the match, return -1, -1 */
144 if (start<0 || count<=i)
145 {start=end=-1;}
Guido van Rossum50700601997-12-08 17:15:20 +0000146 v=Py_BuildValue("ii", start, end);
147 if (v==NULL) {Py_DECREF(list); return NULL;}
148 PyList_SetItem(list, i, v);
149 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000150 return list;
151}
152
153static PyMethodDef Pcre_methods[] = {
154 {"match", (PyCFunction)PyPcre_exec, 1},
155 {NULL, NULL} /* sentinel */
156};
157
158static PyObject *
159PyPcre_getattr(self, name)
160 PcreObject *self;
161 char *name;
162{
163 return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
164}
165
166
167staticforward PyTypeObject Pcre_Type = {
168 PyObject_HEAD_INIT(&PyType_Type)
169 0, /*ob_size*/
170 "Pcre", /*tp_name*/
171 sizeof(PcreObject), /*tp_basicsize*/
172 0, /*tp_itemsize*/
173 /* methods */
174 (destructor)PyPcre_dealloc, /*tp_dealloc*/
175 0, /*tp_print*/
Guido van Rossumcb4d3031997-10-20 23:21:23 +0000176 (getattrfunc)PyPcre_getattr, /*tp_getattr*/
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000177 0, /*tp_setattr*/
178 0, /*tp_compare*/
179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash*/
184};
185/* --------------------------------------------------------------------- */
186
187static PyObject *
188PyPcre_compile(self, args)
189 PyObject *self; /* Not used */
190 PyObject *args;
191{
192 PcreObject *rv;
193 PyObject *dictionary;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000194 char *pattern;
Guido van Rossum58132c61997-12-17 00:24:13 +0000195 const char *error;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000196
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000197 int options, erroroffset;
198 if (!PyArg_ParseTuple(args, "siO!", &pattern, &options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000199 &PyDict_Type, &dictionary))
200 return NULL;
201 rv = newPcreObject(args);
202 if ( rv == NULL )
Guido van Rossum50700601997-12-08 17:15:20 +0000203 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000204
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000205 rv->regex = pcre_compile((char*)pattern, options,
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000206 &error, &erroroffset, dictionary);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000207 if (rv->regex==NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000208 {
209 PyMem_DEL(rv);
210 if (!PyErr_Occurred())
211 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000212 PyObject *errval = Py_BuildValue("si", error, erroroffset);
213 PyErr_SetObject(ErrorObject, errval);
214 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000215 }
216 return NULL;
217 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000218 rv->regex_extra=pcre_study(rv->regex, 0, &error);
219 if (rv->regex_extra==NULL && error!=NULL)
Guido van Rossum50700601997-12-08 17:15:20 +0000220 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000221 PyObject *errval = Py_BuildValue("si", error, 0);
Guido van Rossum50700601997-12-08 17:15:20 +0000222 PyMem_DEL(rv);
Guido van Rossum58132c61997-12-17 00:24:13 +0000223 PyErr_SetObject(ErrorObject, errval);
224 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000225 return NULL;
226 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000227 rv->num_groups = pcre_info(rv->regex, NULL, NULL);
228 if (rv->num_groups<0)
Guido van Rossum50700601997-12-08 17:15:20 +0000229 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000230 PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
231 PyErr_SetObject(ErrorObject, errval);
232 Py_XDECREF(errval);
Guido van Rossum50700601997-12-08 17:15:20 +0000233 PyMem_DEL(rv);
234 return NULL;
235 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000236 return (PyObject *)rv;
237}
238
239static PyObject *
Guido van Rossumc3861071997-10-08 02:07:40 +0000240PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
Guido van Rossum50700601997-12-08 17:15:20 +0000241 unsigned char *pattern;
242 int pattern_len, *indexptr, *typeptr;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000243{
Guido van Rossum50700601997-12-08 17:15:20 +0000244 unsigned char c;
245 int index = *indexptr;
Guido van Rossumc3861071997-10-08 02:07:40 +0000246
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000247 if (pattern_len<=index)
Guido van Rossum50700601997-12-08 17:15:20 +0000248 {
249 PyErr_SetString(ErrorObject, "escape ends too soon");
250 return NULL;
251 }
252 c=pattern[index]; index++;
253 *typeptr=CHAR;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000254
Guido van Rossum50700601997-12-08 17:15:20 +0000255 switch (c)
256 {
257 case('t'):
258 *indexptr=index;
259 return Py_BuildValue("c", (char)9);
Guido van Rossum50700601997-12-08 17:15:20 +0000260 case('n'):
261 *indexptr = index;
262 return Py_BuildValue("c", (char)10);
Guido van Rossum50700601997-12-08 17:15:20 +0000263 case('v'):
264 *indexptr = index;
265 return Py_BuildValue("c", (char)11);
Guido van Rossum50700601997-12-08 17:15:20 +0000266 case('r'):
267 *indexptr = index;
268 return Py_BuildValue("c", (char)13);
Guido van Rossum50700601997-12-08 17:15:20 +0000269 case('f'):
270 *indexptr = index;
271 return Py_BuildValue("c", (char)12);
Guido van Rossum50700601997-12-08 17:15:20 +0000272 case('a'):
273 *indexptr = index;
274 return Py_BuildValue("c", (char)7);
Guido van Rossum50700601997-12-08 17:15:20 +0000275 case('b'):
276 *indexptr=index;
277 return Py_BuildValue("c", (char)8);
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000278 case('\\'):
279 *indexptr=index;
280 return Py_BuildValue("c", '\\');
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000281
Guido van Rossum50700601997-12-08 17:15:20 +0000282 case('x'):
283 {
284 int x, ch, end;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000285
Guido van Rossum50700601997-12-08 17:15:20 +0000286 x = 0; end = index;
287 while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
288 {
289 ch = pattern[end];
290 x = x * 16 + pcre_lcc[ch] -
291 (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
292 x &= 255;
293 end++;
294 }
295 if (end==index)
296 {
297 PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
298 return NULL;
299 }
300 *indexptr = end;
301 return Py_BuildValue("c", (char)x);
302 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000303 break;
Guido van Rossum50700601997-12-08 17:15:20 +0000304
305 case('E'): case('G'): case('L'): case('Q'):
306 case('U'): case('l'): case('u'):
307 {
308 char message[50];
309 sprintf(message, "\\%c is not allowed", c);
310 PyErr_SetString(ErrorObject, message);
311 return NULL;
312 }
313
314 case('g'):
315 {
316 int end, i;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000317 int group_num = 0, is_number=0;
318
Guido van Rossum50700601997-12-08 17:15:20 +0000319 if (pattern_len<=index)
320 {
321 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
322 return NULL;
323 }
324 if (pattern[index]!='<')
325 {
326 PyErr_SetString(ErrorObject, "missing < in symbolic reference");
327 return NULL;
328 }
329 index++;
330 end=index;
331 while (end<pattern_len && pattern[end]!='>')
332 end++;
333 if (end==pattern_len)
334 {
335 PyErr_SetString(ErrorObject, "unfinished symbolic reference");
336 return NULL;
337 }
338
339 if (index==end) /* Zero-length name */
340 {
341 /* XXX should include the text of the reference */
342 PyErr_SetString(ErrorObject, "zero-length symbolic reference");
343 return NULL;
344 }
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000345 if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
Guido van Rossum50700601997-12-08 17:15:20 +0000346 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000347 is_number = 1;
348 group_num = pattern[index] - '0';
Guido van Rossum50700601997-12-08 17:15:20 +0000349 }
350
351 for(i=index+1; i<end; i++)
352 {
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000353 if (is_number &&
354 !(pcre_ctypes[pattern[i]] & ctype_digit) )
355 {
356 /* XXX should include the text of the reference */
357 PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
358 return NULL;
359 }
360 else {group_num = group_num * 10 + pattern[i] - '0';}
Guido van Rossum50700601997-12-08 17:15:20 +0000361 if (!(pcre_ctypes[pattern[i]] & ctype_word) )
362 {
363 /* XXX should include the text of the reference */
364 PyErr_SetString(ErrorObject, "illegal symbolic reference");
365 return NULL;
366 }
367 }
368
369 *typeptr = MEMORY_REFERENCE;
370 *indexptr = end+1;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000371 /* If it's a number, return the integer value of the group */
372 if (is_number) return Py_BuildValue("i", group_num);
373 /* Otherwise, return a string containing the group name */
Guido van Rossum50700601997-12-08 17:15:20 +0000374 return Py_BuildValue("s#", pattern+index, end-index);
375 }
376 break;
377
378 case('0'):
379 {
380 /* \0 always indicates an octal escape, so we consume up to 3
381 characters, as long as they're all octal digits */
382 int octval=0, i;
383 index--;
384 for(i=index;
385 i<=index+2 && i<pattern_len
386 && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
387 i++)
388 {
389 octval = octval * 8 + pattern[i] - '0';
390 }
391 if (octval>255)
392 {
393 PyErr_SetString(ErrorObject, "octal value out of range");
394 return NULL;
395 }
396 *indexptr = i;
397 return Py_BuildValue("c", (unsigned char)octval);
398 }
399 break;
400 case('1'): case('2'): case('3'): case('4'):
401 case('5'): case('6'): case('7'): case('8'):
402 case('9'):
403 {
404 /* Handle \?, where ? is from 1 through 9 */
405 int value=0;
406 index--;
407 /* If it's at least a two-digit reference, like \34, it might
408 either be a 3-digit octal escape (\123) or a 2-digit
409 decimal memory reference (\34) */
410
411 if ( (index+1) <pattern_len &&
412 (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
413 {
414 if ( (index+2) <pattern_len &&
415 (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
416 (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
417 (pcre_ctypes[ pattern[index ] ] & ctype_odigit)
418 )
419 {
420 /* 3 octal digits */
421 value= 8*8*(pattern[index ]-'0') +
422 8*(pattern[index+1]-'0') +
423 (pattern[index+2]-'0');
424 if (value>255)
425 {
426 PyErr_SetString(ErrorObject, "octal value out of range");
427 return NULL;
428 }
429 *indexptr = index+3;
430 return Py_BuildValue("c", (unsigned char)value);
431 }
432 else
433 {
434 /* 2-digit form, so it's a memory reference */
435 value= 10*(pattern[index ]-'0') +
436 (pattern[index+1]-'0');
437 if (value<1 || EXTRACT_MAX<=value)
438 {
439 PyErr_SetString(ErrorObject, "memory reference out of range");
440 return NULL;
441 }
442 *typeptr = MEMORY_REFERENCE;
443 *indexptr = index+2;
444 return Py_BuildValue("i", value);
445 }
446 }
447 else
448 {
449 /* Single-digit form, like \2, so it's a memory reference */
450 *typeptr = MEMORY_REFERENCE;
451 *indexptr = index+1;
452 return Py_BuildValue("i", pattern[index]-'0');
453 }
454 }
455 break;
456
457 default:
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000458 /* It's some unknown escape like \s, so return a string containing
459 \s */
460 *typeptr = STRING;
Guido van Rossum50700601997-12-08 17:15:20 +0000461 *indexptr = index;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000462 return Py_BuildValue("s#", pattern+index-2, 2);
Guido van Rossum50700601997-12-08 17:15:20 +0000463 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000464}
465
466static PyObject *
467PyPcre_expand(self, args)
468 PyObject *self;
469 PyObject *args;
470{
Guido van Rossum50700601997-12-08 17:15:20 +0000471 PyObject *results, *match_obj;
472 PyObject *repl_obj, *newstring;
473 unsigned char *repl;
474 int size, total_len, i, start, pos;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000475
Guido van Rossum50700601997-12-08 17:15:20 +0000476 if (!PyArg_ParseTuple(args, "OS", &match_obj, &repl_obj))
477 return NULL;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000478
Guido van Rossum50700601997-12-08 17:15:20 +0000479 repl=(unsigned char *)PyString_AsString(repl_obj);
480 size=PyString_Size(repl_obj);
481 results=PyList_New(0);
482 if (results==NULL) return NULL;
483 for(start=total_len=i=0; i<size; i++)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000484 {
Guido van Rossum50700601997-12-08 17:15:20 +0000485 if (repl[i]=='\\')
486 {
487 PyObject *value;
488 int escape_type;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000489
Guido van Rossum50700601997-12-08 17:15:20 +0000490 if (start!=i)
491 {
492 PyList_Append(results,
493 PyString_FromStringAndSize((char *)repl+start, i-start));
494 total_len += i-start;
495 }
496 i++;
497 value=PyPcre_expand_escape(repl, size, &i, &escape_type);
498 if (value==NULL)
499 {
500 /* PyPcre_expand_escape triggered an exception of some sort,
501 so just return */
502 Py_DECREF(results);
503 return NULL;
504 }
505 switch (escape_type)
506 {
507 case (CHAR):
508 PyList_Append(results, value);
509 total_len += PyString_Size(value);
510 break;
511 case(MEMORY_REFERENCE):
512 {
513 PyObject *r, *tuple, *result;
514 r=PyObject_GetAttrString(match_obj, "group");
Guido van Rossum1a785531998-07-17 20:19:48 +0000515 if (r == NULL) {
516 Py_DECREF(results);
517 return NULL;
518 }
Guido van Rossum50700601997-12-08 17:15:20 +0000519 tuple=PyTuple_New(1);
520 Py_INCREF(value);
521 PyTuple_SetItem(tuple, 0, value);
522 result=PyEval_CallObject(r, tuple);
523 Py_DECREF(r); Py_DECREF(tuple);
524 if (result==NULL)
525 {
Guido van Rossum58132c61997-12-17 00:24:13 +0000526 /* The group() method triggered an exception of some sort */
Guido van Rossum50700601997-12-08 17:15:20 +0000527 Py_DECREF(results);
528 Py_DECREF(value);
529 return NULL;
530 }
531 if (result==Py_None)
532 {
533 char message[50];
534 sprintf(message,
535 "group did not contribute to the match");
536 PyErr_SetString(ErrorObject,
537 message);
538 Py_DECREF(result);
539 Py_DECREF(value);
540 Py_DECREF(results);
541 return NULL;
542 }
543 /* typecheck that it's a string! */
544 if (!PyString_Check(result))
545 {
546 Py_DECREF(results);
547 Py_DECREF(result);
548 PyErr_SetString(ErrorObject,
549 "group() must return a string value for replacement");
550 return NULL;
551 }
552 PyList_Append(results, result);
553 total_len += PyString_Size(result);
554 Py_DECREF(result);
555 }
556 break;
Guido van Rossum042ff9e1998-04-03 21:13:31 +0000557 case(STRING):
558 {
559 PyList_Append(results, value);
560 total_len += PyString_Size(value);
561 break;
562 }
Guido van Rossum50700601997-12-08 17:15:20 +0000563 default:
564 Py_DECREF(results);
565 PyErr_SetString(ErrorObject,
566 "bad escape in replacement");
567 return NULL;
568 }
569 Py_DECREF(value);
570 start=i;
571 i--; /* Decrement now, because the 'for' loop will increment it */
572 }
573 } /* endif repl[i]!='\\' */
574
575 if (start!=i)
576 {
577 PyList_Append(results, PyString_FromStringAndSize((char *)repl+start, i-start));
578 total_len += i-start;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000579 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000580
Guido van Rossum50700601997-12-08 17:15:20 +0000581 /* Whew! Now we've constructed a list containing various pieces of
582 strings that will make up our final result. So, iterate over
583 the list concatenating them. A new string measuring total_len
584 bytes is allocated and filled in. */
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000585
Guido van Rossum50700601997-12-08 17:15:20 +0000586 newstring=PyString_FromStringAndSize(NULL, total_len);
587 if (newstring==NULL)
588 {
589 Py_DECREF(results);
590 return NULL;
591 }
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000592
Guido van Rossum50700601997-12-08 17:15:20 +0000593 repl=(unsigned char *)PyString_AsString(newstring);
594 for (pos=i=0; i<PyList_Size(results); i++)
595 {
596 PyObject *item=PyList_GetItem(results, i);
597 memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
598 pos += PyString_Size(item);
599 }
600 Py_DECREF(results);
601 return newstring;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000602}
603
604
605/* List of functions defined in the module */
606
607static PyMethodDef pcre_methods[] = {
608 {"pcre_compile", PyPcre_compile, 1},
609 {"pcre_expand", PyPcre_expand, 1},
610 {NULL, NULL} /* sentinel */
611};
612
613
614/*
615 * Convenience routine to export an integer value.
616 * For simplicity, errors (which are unlikely anyway) are ignored.
617 */
618
619static void
620insint(d, name, value)
621 PyObject * d;
622 char * name;
623 int value;
624{
625 PyObject *v = PyInt_FromLong((long) value);
626 if (v == NULL) {
627 /* Don't bother reporting this error */
628 PyErr_Clear();
629 }
630 else {
631 PyDict_SetItemString(d, name, v);
632 Py_DECREF(v);
633 }
634}
635
636
637/* Initialization function for the module (*must* be called initpcre) */
638
Guido van Rossum3886bb61998-12-04 18:50:17 +0000639DL_EXPORT(void)
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000640initpcre()
641{
642 PyObject *m, *d;
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000643
644 /* Create the module and add the functions */
645 m = Py_InitModule("pcre", pcre_methods);
646
647 /* Add some symbolic constants to the module */
648 d = PyModule_GetDict(m);
649 ErrorObject = PyString_FromString("pcre.error");
650 PyDict_SetItemString(d, "error", ErrorObject);
651
652 /* Insert the flags */
653 insint(d, "IGNORECASE", PCRE_CASELESS);
654 insint(d, "ANCHORED", PCRE_ANCHORED);
655 insint(d, "MULTILINE", PCRE_MULTILINE);
656 insint(d, "DOTALL", PCRE_DOTALL);
657 insint(d, "VERBOSE", PCRE_EXTENDED);
Guido van Rossum50700601997-12-08 17:15:20 +0000658 insint(d, "LOCALE", PCRE_LOCALE);
Guido van Rossum51b3aa31997-10-06 14:43:11 +0000659
660 /* Check for errors */
661 if (PyErr_Occurred())
662 Py_FatalError("can't initialize module pcre");
663}
664