| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 1 | /*********************************************************** | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 2 | Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam, | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 3 | The Netherlands. | 
|  | 4 |  | 
|  | 5 | All Rights Reserved | 
|  | 6 |  | 
| Guido van Rossum | fd71b9e | 2000-06-30 23:50:40 +0000 | [diff] [blame] | 7 | Copyright (c) 2000, BeOpen.com. | 
|  | 8 | Copyright (c) 1995-2000, Corporation for National Research Initiatives. | 
|  | 9 | Copyright (c) 1990-1995, Stichting Mathematisch Centrum. | 
|  | 10 | All rights reserved. | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 11 |  | 
| Guido van Rossum | fd71b9e | 2000-06-30 23:50:40 +0000 | [diff] [blame] | 12 | See the file "Misc/COPYRIGHT" for information on usage and | 
|  | 13 | redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 14 |  | 
|  | 15 | ******************************************************************/ | 
|  | 16 |  | 
|  | 17 | /* Pcre objects */ | 
|  | 18 |  | 
|  | 19 | #include "Python.h" | 
|  | 20 |  | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 21 | #include <assert.h> | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 22 | #ifndef Py_eval_input | 
|  | 23 | /* For Python 1.4, graminit.h has to be explicitly included */ | 
|  | 24 | #include "graminit.h" | 
|  | 25 | #define Py_eval_input eval_input | 
|  | 26 | #endif | 
|  | 27 |  | 
|  | 28 | #ifndef FOR_PYTHON | 
|  | 29 | #define FOR_PYTHON | 
|  | 30 | #endif | 
|  | 31 |  | 
|  | 32 | #include "pcre.h" | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 33 | #include "pcre-int.h" | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 34 |  | 
|  | 35 | static PyObject *ErrorObject; | 
|  | 36 |  | 
|  | 37 | typedef struct { | 
|  | 38 | PyObject_HEAD | 
|  | 39 | pcre *regex; | 
|  | 40 | pcre_extra *regex_extra; | 
|  | 41 | int num_groups; | 
|  | 42 | } PcreObject; | 
|  | 43 |  | 
|  | 44 | staticforward PyTypeObject Pcre_Type; | 
|  | 45 |  | 
|  | 46 | #define PcreObject_Check(v)	((v)->ob_type == &Pcre_Type) | 
|  | 47 | #define NORMAL			0 | 
|  | 48 | #define CHARCLASS		1 | 
|  | 49 | #define REPLACEMENT		2 | 
|  | 50 |  | 
|  | 51 | #define CHAR 			0 | 
|  | 52 | #define MEMORY_REFERENCE 	1 | 
|  | 53 | #define SYNTAX 			2 | 
|  | 54 | #define NOT_SYNTAX 		3 | 
|  | 55 | #define SET			4 | 
|  | 56 | #define WORD_BOUNDARY		5 | 
|  | 57 | #define NOT_WORD_BOUNDARY	6 | 
|  | 58 | #define BEGINNING_OF_BUFFER	7 | 
|  | 59 | #define END_OF_BUFFER		8 | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 60 | #define STRING                  9 | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 61 |  | 
|  | 62 | static PcreObject * | 
|  | 63 | newPcreObject(arg) | 
|  | 64 | PyObject *arg; | 
|  | 65 | { | 
|  | 66 | PcreObject *self; | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 67 | self = PyObject_New(PcreObject, &Pcre_Type); | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 68 | if (self == NULL) | 
|  | 69 | return NULL; | 
|  | 70 | self->regex = NULL; | 
|  | 71 | self->regex_extra = NULL; | 
|  | 72 | return self; | 
|  | 73 | } | 
|  | 74 |  | 
|  | 75 | /* Pcre methods */ | 
|  | 76 |  | 
|  | 77 | static void | 
|  | 78 | PyPcre_dealloc(self) | 
|  | 79 | PcreObject *self; | 
|  | 80 | { | 
| Andrew M. Kuchling | 0c7822e | 2000-02-18 18:30:01 +0000 | [diff] [blame] | 81 | if (self->regex) (pcre_free)(self->regex); | 
|  | 82 | if (self->regex_extra) (pcre_free)(self->regex_extra); | 
| Guido van Rossum | b18618d | 2000-05-03 23:44:39 +0000 | [diff] [blame] | 83 | PyObject_Del(self); | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 84 | } | 
|  | 85 |  | 
|  | 86 |  | 
|  | 87 | static PyObject * | 
|  | 88 | PyPcre_exec(self, args) | 
|  | 89 | PcreObject *self; | 
|  | 90 | PyObject *args; | 
|  | 91 | { | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 92 | char *string; | 
|  | 93 | int stringlen, pos = 0, options=0, endpos = -1, i, count; | 
|  | 94 | int offsets[100*2]; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 95 | PyObject *list; | 
|  | 96 |  | 
| Guido van Rossum | 43713e5 | 2000-02-29 13:59:29 +0000 | [diff] [blame] | 97 | if (!PyArg_ParseTuple(args, "t#|iiii:match", &string, &stringlen, &pos, &endpos, &options)) | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 98 | return NULL; | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 99 | if (endpos == -1) {endpos = stringlen;} | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 100 | count = pcre_exec(self->regex, self->regex_extra, | 
| Guido van Rossum | 7e48898 | 1998-10-08 02:25:24 +0000 | [diff] [blame] | 101 | string, endpos, pos, options, | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 102 | offsets, sizeof(offsets)/sizeof(int) ); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 103 | /* If an error occurred during the match, and an exception was raised, | 
|  | 104 | just return NULL and leave the exception alone.  The most likely | 
|  | 105 | problem to cause this would be running out of memory for | 
|  | 106 | the failure stack. */ | 
|  | 107 | if (PyErr_Occurred()) | 
|  | 108 | { | 
|  | 109 | return NULL; | 
|  | 110 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 111 | if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;} | 
|  | 112 | if (count<0) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 113 | { | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 114 | PyObject *errval = Py_BuildValue("si", "Regex execution error", count); | 
|  | 115 | PyErr_SetObject(ErrorObject, errval); | 
|  | 116 | Py_XDECREF(errval); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 117 | return NULL; | 
|  | 118 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 119 |  | 
|  | 120 | list=PyList_New(self->num_groups+1); | 
|  | 121 | if (list==NULL) return NULL; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 122 | for(i=0; i<=self->num_groups; i++) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 123 | { | 
|  | 124 | PyObject *v; | 
|  | 125 | int start=offsets[i*2], end=offsets[i*2+1]; | 
|  | 126 | /* If the group wasn't affected by the match, return -1, -1 */ | 
|  | 127 | if (start<0 || count<=i) | 
|  | 128 | {start=end=-1;} | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 129 | v=Py_BuildValue("ii", start, end); | 
|  | 130 | if (v==NULL) {Py_DECREF(list); return NULL;} | 
|  | 131 | PyList_SetItem(list, i, v); | 
|  | 132 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 133 | return list; | 
|  | 134 | } | 
|  | 135 |  | 
|  | 136 | static PyMethodDef Pcre_methods[] = { | 
|  | 137 | {"match",	(PyCFunction)PyPcre_exec,	1}, | 
|  | 138 | {NULL,		NULL}		/* sentinel */ | 
|  | 139 | }; | 
|  | 140 |  | 
|  | 141 | static PyObject * | 
|  | 142 | PyPcre_getattr(self, name) | 
|  | 143 | PcreObject *self; | 
|  | 144 | char *name; | 
|  | 145 | { | 
|  | 146 | return Py_FindMethod(Pcre_methods, (PyObject *)self, name); | 
|  | 147 | } | 
|  | 148 |  | 
|  | 149 |  | 
|  | 150 | staticforward PyTypeObject Pcre_Type = { | 
| Fred Drake | 0d40ba4 | 2000-02-04 20:33:49 +0000 | [diff] [blame] | 151 | PyObject_HEAD_INIT(NULL) | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 152 | 0,			/*ob_size*/ | 
|  | 153 | "Pcre",			/*tp_name*/ | 
|  | 154 | sizeof(PcreObject),	/*tp_basicsize*/ | 
|  | 155 | 0,			/*tp_itemsize*/ | 
|  | 156 | /* methods */ | 
|  | 157 | (destructor)PyPcre_dealloc, /*tp_dealloc*/ | 
|  | 158 | 0,			/*tp_print*/ | 
| Guido van Rossum | cb4d303 | 1997-10-20 23:21:23 +0000 | [diff] [blame] | 159 | (getattrfunc)PyPcre_getattr, /*tp_getattr*/ | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 160 | 0,                      /*tp_setattr*/ | 
|  | 161 | 0,			/*tp_compare*/ | 
|  | 162 | 0,			/*tp_repr*/ | 
|  | 163 | 0,			/*tp_as_number*/ | 
|  | 164 | 0,			/*tp_as_sequence*/ | 
|  | 165 | 0,			/*tp_as_mapping*/ | 
|  | 166 | 0,			/*tp_hash*/ | 
|  | 167 | }; | 
|  | 168 | /* --------------------------------------------------------------------- */ | 
|  | 169 |  | 
|  | 170 | static PyObject * | 
|  | 171 | PyPcre_compile(self, args) | 
|  | 172 | PyObject *self; /* Not used */ | 
|  | 173 | PyObject *args; | 
|  | 174 | { | 
|  | 175 | PcreObject *rv; | 
|  | 176 | PyObject *dictionary; | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 177 | char *pattern; | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 178 | const char *error; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 179 |  | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 180 | int options, erroroffset; | 
| Guido van Rossum | 43713e5 | 2000-02-29 13:59:29 +0000 | [diff] [blame] | 181 | if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options, | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 182 | &PyDict_Type, &dictionary)) | 
|  | 183 | return NULL; | 
|  | 184 | rv = newPcreObject(args); | 
|  | 185 | if ( rv == NULL ) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 186 | return NULL; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 187 |  | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 188 | rv->regex = pcre_compile((char*)pattern, options, | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 189 | &error, &erroroffset, dictionary); | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 190 | if (rv->regex==NULL) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 191 | { | 
| Guido van Rossum | c4428c5 | 2000-04-25 15:59:32 +0000 | [diff] [blame] | 192 | Py_DECREF(rv); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 193 | if (!PyErr_Occurred()) | 
|  | 194 | { | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 195 | PyObject *errval = Py_BuildValue("si", error, erroroffset); | 
|  | 196 | PyErr_SetObject(ErrorObject, errval); | 
|  | 197 | Py_XDECREF(errval); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 198 | } | 
|  | 199 | return NULL; | 
|  | 200 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 201 | rv->regex_extra=pcre_study(rv->regex, 0, &error); | 
|  | 202 | if (rv->regex_extra==NULL && error!=NULL) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 203 | { | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 204 | PyObject *errval = Py_BuildValue("si", error, 0); | 
| Guido van Rossum | c4428c5 | 2000-04-25 15:59:32 +0000 | [diff] [blame] | 205 | Py_DECREF(rv); | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 206 | PyErr_SetObject(ErrorObject, errval); | 
|  | 207 | Py_XDECREF(errval); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 208 | return NULL; | 
|  | 209 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 210 | rv->num_groups = pcre_info(rv->regex, NULL, NULL); | 
|  | 211 | if (rv->num_groups<0) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 212 | { | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 213 | PyObject *errval = Py_BuildValue("si", error, rv->num_groups); | 
|  | 214 | PyErr_SetObject(ErrorObject, errval); | 
|  | 215 | Py_XDECREF(errval); | 
| Guido van Rossum | c4428c5 | 2000-04-25 15:59:32 +0000 | [diff] [blame] | 216 | Py_DECREF(rv); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 217 | return NULL; | 
|  | 218 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 219 | return (PyObject *)rv; | 
|  | 220 | } | 
|  | 221 |  | 
|  | 222 | static PyObject * | 
| Guido van Rossum | c386107 | 1997-10-08 02:07:40 +0000 | [diff] [blame] | 223 | PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 224 | unsigned char *pattern; | 
|  | 225 | int pattern_len, *indexptr, *typeptr; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 226 | { | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 227 | unsigned char c; | 
|  | 228 | int index = *indexptr; | 
| Guido van Rossum | c386107 | 1997-10-08 02:07:40 +0000 | [diff] [blame] | 229 |  | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 230 | if (pattern_len<=index) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 231 | { | 
|  | 232 | PyErr_SetString(ErrorObject, "escape ends too soon"); | 
|  | 233 | return NULL; | 
|  | 234 | } | 
|  | 235 | c=pattern[index]; index++; | 
|  | 236 | *typeptr=CHAR; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 237 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 238 | switch (c) | 
|  | 239 | { | 
|  | 240 | case('t'): | 
|  | 241 | *indexptr=index; | 
|  | 242 | return Py_BuildValue("c", (char)9); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 243 | case('n'): | 
|  | 244 | *indexptr = index; | 
|  | 245 | return Py_BuildValue("c", (char)10); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 246 | case('v'): | 
|  | 247 | *indexptr = index; | 
|  | 248 | return Py_BuildValue("c", (char)11); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 249 | case('r'): | 
|  | 250 | *indexptr = index; | 
|  | 251 | return Py_BuildValue("c", (char)13); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 252 | case('f'): | 
|  | 253 | *indexptr = index; | 
|  | 254 | return Py_BuildValue("c", (char)12); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 255 | case('a'): | 
|  | 256 | *indexptr = index; | 
|  | 257 | return Py_BuildValue("c", (char)7); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 258 | case('b'): | 
|  | 259 | *indexptr=index; | 
|  | 260 | return Py_BuildValue("c", (char)8); | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 261 | case('\\'): | 
|  | 262 | *indexptr=index; | 
|  | 263 | return Py_BuildValue("c", '\\'); | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 264 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 265 | case('x'): | 
|  | 266 | { | 
|  | 267 | int x, ch, end; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 268 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 269 | x = 0; end = index; | 
|  | 270 | while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0) | 
|  | 271 | { | 
|  | 272 | ch = pattern[end]; | 
|  | 273 | x = x * 16 + pcre_lcc[ch] - | 
|  | 274 | (((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W'); | 
|  | 275 | x &= 255; | 
|  | 276 | end++; | 
|  | 277 | } | 
|  | 278 | if (end==index) | 
|  | 279 | { | 
|  | 280 | PyErr_SetString(ErrorObject, "\\x must be followed by hex digits"); | 
|  | 281 | return NULL; | 
|  | 282 | } | 
|  | 283 | *indexptr = end; | 
|  | 284 | return Py_BuildValue("c", (char)x); | 
|  | 285 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 286 | break; | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 287 |  | 
|  | 288 | case('E'):    case('G'):    case('L'):    case('Q'): | 
|  | 289 | case('U'):    case('l'):    case('u'): | 
|  | 290 | { | 
|  | 291 | char message[50]; | 
|  | 292 | sprintf(message, "\\%c is not allowed", c); | 
|  | 293 | PyErr_SetString(ErrorObject, message); | 
|  | 294 | return NULL; | 
|  | 295 | } | 
|  | 296 |  | 
|  | 297 | case('g'): | 
|  | 298 | { | 
|  | 299 | int end, i; | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 300 | int group_num = 0, is_number=0; | 
|  | 301 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 302 | if (pattern_len<=index) | 
|  | 303 | { | 
|  | 304 | PyErr_SetString(ErrorObject, "unfinished symbolic reference"); | 
|  | 305 | return NULL; | 
|  | 306 | } | 
|  | 307 | if (pattern[index]!='<') | 
|  | 308 | { | 
|  | 309 | PyErr_SetString(ErrorObject, "missing < in symbolic reference"); | 
|  | 310 | return NULL; | 
|  | 311 | } | 
|  | 312 | index++; | 
|  | 313 | end=index; | 
|  | 314 | while (end<pattern_len && pattern[end]!='>') | 
|  | 315 | end++; | 
|  | 316 | if (end==pattern_len) | 
|  | 317 | { | 
|  | 318 | PyErr_SetString(ErrorObject, "unfinished symbolic reference"); | 
|  | 319 | return NULL; | 
|  | 320 | } | 
|  | 321 |  | 
|  | 322 | if (index==end)		/* Zero-length name */ | 
|  | 323 | { | 
|  | 324 | /* XXX should include the text of the reference */ | 
|  | 325 | PyErr_SetString(ErrorObject, "zero-length symbolic reference"); | 
|  | 326 | return NULL; | 
|  | 327 | } | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 328 | if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */ | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 329 | { | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 330 | is_number = 1; | 
|  | 331 | group_num = pattern[index] - '0'; | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 332 | } | 
|  | 333 |  | 
|  | 334 | for(i=index+1; i<end; i++) | 
|  | 335 | { | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 336 | if (is_number && | 
|  | 337 | !(pcre_ctypes[pattern[i]] & ctype_digit) ) | 
|  | 338 | { | 
|  | 339 | /* XXX should include the text of the reference */ | 
|  | 340 | PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit"); | 
|  | 341 | return NULL; | 
|  | 342 | } | 
|  | 343 | else {group_num = group_num * 10 + pattern[i] - '0';} | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 344 | if (!(pcre_ctypes[pattern[i]] & ctype_word) ) | 
|  | 345 | { | 
|  | 346 | /* XXX should include the text of the reference */ | 
|  | 347 | PyErr_SetString(ErrorObject, "illegal symbolic reference"); | 
|  | 348 | return NULL; | 
|  | 349 | } | 
|  | 350 | } | 
|  | 351 |  | 
|  | 352 | *typeptr = MEMORY_REFERENCE; | 
|  | 353 | *indexptr = end+1; | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 354 | /* If it's a number, return the integer value of the group */ | 
|  | 355 | if (is_number) return Py_BuildValue("i", group_num); | 
|  | 356 | /* Otherwise, return a string containing the group name */ | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 357 | return Py_BuildValue("s#", pattern+index, end-index); | 
|  | 358 | } | 
|  | 359 | break; | 
|  | 360 |  | 
|  | 361 | case('0'): | 
|  | 362 | { | 
|  | 363 | /* \0 always indicates an octal escape, so we consume up to 3 | 
|  | 364 | characters, as long as they're all octal digits */ | 
|  | 365 | int octval=0, i; | 
|  | 366 | index--; | 
|  | 367 | for(i=index; | 
|  | 368 | i<=index+2 && i<pattern_len | 
|  | 369 | && (pcre_ctypes[ pattern[i] ] & ctype_odigit ); | 
|  | 370 | i++) | 
|  | 371 | { | 
|  | 372 | octval = octval * 8 + pattern[i] - '0'; | 
|  | 373 | } | 
|  | 374 | if (octval>255) | 
|  | 375 | { | 
|  | 376 | PyErr_SetString(ErrorObject, "octal value out of range"); | 
|  | 377 | return NULL; | 
|  | 378 | } | 
|  | 379 | *indexptr = i; | 
|  | 380 | return Py_BuildValue("c", (unsigned char)octval); | 
|  | 381 | } | 
|  | 382 | break; | 
|  | 383 | case('1'):    case('2'):    case('3'):    case('4'): | 
|  | 384 | case('5'):    case('6'):    case('7'):    case('8'): | 
|  | 385 | case('9'): | 
|  | 386 | { | 
|  | 387 | /* Handle \?, where ? is from 1 through 9 */ | 
|  | 388 | int value=0; | 
|  | 389 | index--; | 
|  | 390 | /* If it's at least a two-digit reference, like \34, it might | 
|  | 391 | either be a 3-digit octal escape (\123) or a 2-digit | 
|  | 392 | decimal memory reference (\34) */ | 
|  | 393 |  | 
|  | 394 | if ( (index+1) <pattern_len && | 
|  | 395 | (pcre_ctypes[ pattern[index+1] ] & ctype_digit) ) | 
|  | 396 | { | 
|  | 397 | if ( (index+2) <pattern_len && | 
|  | 398 | (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) && | 
|  | 399 | (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) && | 
|  | 400 | (pcre_ctypes[ pattern[index  ] ] & ctype_odigit) | 
|  | 401 | ) | 
|  | 402 | { | 
|  | 403 | /* 3 octal digits */ | 
|  | 404 | value= 8*8*(pattern[index  ]-'0') + | 
|  | 405 | 8*(pattern[index+1]-'0') + | 
|  | 406 | (pattern[index+2]-'0'); | 
|  | 407 | if (value>255) | 
|  | 408 | { | 
|  | 409 | PyErr_SetString(ErrorObject, "octal value out of range"); | 
|  | 410 | return NULL; | 
|  | 411 | } | 
|  | 412 | *indexptr = index+3; | 
|  | 413 | return Py_BuildValue("c", (unsigned char)value); | 
|  | 414 | } | 
|  | 415 | else | 
|  | 416 | { | 
|  | 417 | /* 2-digit form, so it's a memory reference */ | 
|  | 418 | value= 10*(pattern[index  ]-'0') + | 
|  | 419 | (pattern[index+1]-'0'); | 
|  | 420 | if (value<1 || EXTRACT_MAX<=value) | 
|  | 421 | { | 
|  | 422 | PyErr_SetString(ErrorObject, "memory reference out of range"); | 
|  | 423 | return NULL; | 
|  | 424 | } | 
|  | 425 | *typeptr = MEMORY_REFERENCE; | 
|  | 426 | *indexptr = index+2; | 
|  | 427 | return Py_BuildValue("i", value); | 
|  | 428 | } | 
|  | 429 | } | 
|  | 430 | else | 
|  | 431 | { | 
|  | 432 | /* Single-digit form, like \2, so it's a memory reference */ | 
|  | 433 | *typeptr = MEMORY_REFERENCE; | 
|  | 434 | *indexptr = index+1; | 
|  | 435 | return Py_BuildValue("i", pattern[index]-'0'); | 
|  | 436 | } | 
|  | 437 | } | 
|  | 438 | break; | 
|  | 439 |  | 
|  | 440 | default: | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 441 | /* It's some unknown escape like \s, so return a string containing | 
|  | 442 | \s */ | 
|  | 443 | *typeptr = STRING; | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 444 | *indexptr = index; | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 445 | return Py_BuildValue("s#", pattern+index-2, 2); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 446 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 447 | } | 
|  | 448 |  | 
|  | 449 | static PyObject * | 
|  | 450 | PyPcre_expand(self, args) | 
|  | 451 | PyObject *self; | 
|  | 452 | PyObject *args; | 
|  | 453 | { | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 454 | PyObject *results, *match_obj; | 
|  | 455 | PyObject *repl_obj, *newstring; | 
|  | 456 | unsigned char *repl; | 
|  | 457 | int size, total_len, i, start, pos; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 458 |  | 
| Guido van Rossum | 43713e5 | 2000-02-29 13:59:29 +0000 | [diff] [blame] | 459 | if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj)) | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 460 | return NULL; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 461 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 462 | repl=(unsigned char *)PyString_AsString(repl_obj); | 
|  | 463 | size=PyString_Size(repl_obj); | 
|  | 464 | results=PyList_New(0); | 
|  | 465 | if (results==NULL) return NULL; | 
|  | 466 | for(start=total_len=i=0; i<size; i++) | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 467 | { | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 468 | if (repl[i]=='\\') | 
|  | 469 | { | 
|  | 470 | PyObject *value; | 
|  | 471 | int escape_type; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 472 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 473 | if (start!=i) | 
|  | 474 | { | 
| Barry Warsaw | 6aa4257 | 1999-02-01 17:09:00 +0000 | [diff] [blame] | 475 | int status; | 
|  | 476 | PyObject *s = PyString_FromStringAndSize( | 
|  | 477 | (char *)repl+start, i-start); | 
|  | 478 | if (s == NULL) { | 
|  | 479 | Py_DECREF(results); | 
|  | 480 | return NULL; | 
|  | 481 | } | 
|  | 482 | status = PyList_Append(results, s); | 
|  | 483 | Py_DECREF(s); | 
|  | 484 | if (status < 0) { | 
|  | 485 | Py_DECREF(results); | 
|  | 486 | return NULL; | 
|  | 487 | } | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 488 | total_len += i-start; | 
|  | 489 | } | 
|  | 490 | i++; | 
|  | 491 | value=PyPcre_expand_escape(repl, size, &i, &escape_type); | 
|  | 492 | if (value==NULL) | 
|  | 493 | { | 
|  | 494 | /* PyPcre_expand_escape triggered an exception of some sort, | 
|  | 495 | so just return */ | 
|  | 496 | Py_DECREF(results); | 
|  | 497 | return NULL; | 
|  | 498 | } | 
|  | 499 | switch (escape_type) | 
|  | 500 | { | 
|  | 501 | case (CHAR): | 
|  | 502 | PyList_Append(results, value); | 
|  | 503 | total_len += PyString_Size(value); | 
|  | 504 | break; | 
|  | 505 | case(MEMORY_REFERENCE): | 
|  | 506 | { | 
|  | 507 | PyObject *r, *tuple, *result; | 
|  | 508 | r=PyObject_GetAttrString(match_obj, "group"); | 
| Guido van Rossum | 1a78553 | 1998-07-17 20:19:48 +0000 | [diff] [blame] | 509 | if (r == NULL) { | 
|  | 510 | Py_DECREF(results); | 
|  | 511 | return NULL; | 
|  | 512 | } | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 513 | tuple=PyTuple_New(1); | 
|  | 514 | Py_INCREF(value); | 
|  | 515 | PyTuple_SetItem(tuple, 0, value); | 
|  | 516 | result=PyEval_CallObject(r, tuple); | 
|  | 517 | Py_DECREF(r); Py_DECREF(tuple); | 
|  | 518 | if (result==NULL) | 
|  | 519 | { | 
| Guido van Rossum | 58132c6 | 1997-12-17 00:24:13 +0000 | [diff] [blame] | 520 | /* The group() method triggered an exception of some sort */ | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 521 | Py_DECREF(results); | 
|  | 522 | Py_DECREF(value); | 
|  | 523 | return NULL; | 
|  | 524 | } | 
|  | 525 | if (result==Py_None) | 
|  | 526 | { | 
|  | 527 | char message[50]; | 
|  | 528 | sprintf(message, | 
|  | 529 | "group did not contribute to the match"); | 
|  | 530 | PyErr_SetString(ErrorObject, | 
|  | 531 | message); | 
|  | 532 | Py_DECREF(result); | 
|  | 533 | Py_DECREF(value); | 
|  | 534 | Py_DECREF(results); | 
|  | 535 | return NULL; | 
|  | 536 | } | 
|  | 537 | /* typecheck that it's a string! */ | 
|  | 538 | if (!PyString_Check(result)) | 
|  | 539 | { | 
|  | 540 | Py_DECREF(results); | 
|  | 541 | Py_DECREF(result); | 
|  | 542 | PyErr_SetString(ErrorObject, | 
|  | 543 | "group() must return a string value for replacement"); | 
|  | 544 | return NULL; | 
|  | 545 | } | 
|  | 546 | PyList_Append(results, result); | 
|  | 547 | total_len += PyString_Size(result); | 
|  | 548 | Py_DECREF(result); | 
|  | 549 | } | 
|  | 550 | break; | 
| Guido van Rossum | 042ff9e | 1998-04-03 21:13:31 +0000 | [diff] [blame] | 551 | case(STRING): | 
|  | 552 | { | 
|  | 553 | PyList_Append(results, value); | 
|  | 554 | total_len += PyString_Size(value); | 
|  | 555 | break; | 
|  | 556 | } | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 557 | default: | 
|  | 558 | Py_DECREF(results); | 
|  | 559 | PyErr_SetString(ErrorObject, | 
|  | 560 | "bad escape in replacement"); | 
|  | 561 | return NULL; | 
|  | 562 | } | 
|  | 563 | Py_DECREF(value); | 
|  | 564 | start=i; | 
|  | 565 | i--; /* Decrement now, because the 'for' loop will increment it */ | 
|  | 566 | } | 
|  | 567 | } /* endif repl[i]!='\\' */ | 
|  | 568 |  | 
|  | 569 | if (start!=i) | 
|  | 570 | { | 
| Barry Warsaw | 6aa4257 | 1999-02-01 17:09:00 +0000 | [diff] [blame] | 571 | int status; | 
|  | 572 | PyObject *s = PyString_FromStringAndSize((char *)repl+start, | 
|  | 573 | i-start); | 
|  | 574 | if (s == NULL) { | 
|  | 575 | Py_DECREF(results); | 
|  | 576 | return NULL; | 
|  | 577 | } | 
|  | 578 | status = PyList_Append(results, s); | 
|  | 579 | Py_DECREF(s); | 
|  | 580 | if (status < 0) { | 
|  | 581 | Py_DECREF(results); | 
|  | 582 | return NULL; | 
|  | 583 | } | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 584 | total_len += i-start; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 585 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 586 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 587 | /* Whew!  Now we've constructed a list containing various pieces of | 
|  | 588 | strings that will make up our final result.  So, iterate over | 
|  | 589 | the list concatenating them.  A new string measuring total_len | 
|  | 590 | bytes is allocated and filled in. */ | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 591 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 592 | newstring=PyString_FromStringAndSize(NULL, total_len); | 
|  | 593 | if (newstring==NULL) | 
|  | 594 | { | 
|  | 595 | Py_DECREF(results); | 
|  | 596 | return NULL; | 
|  | 597 | } | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 598 |  | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 599 | repl=(unsigned char *)PyString_AsString(newstring); | 
|  | 600 | for (pos=i=0; i<PyList_Size(results); i++) | 
|  | 601 | { | 
|  | 602 | PyObject *item=PyList_GetItem(results, i); | 
|  | 603 | memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) ); | 
|  | 604 | pos += PyString_Size(item); | 
|  | 605 | } | 
|  | 606 | Py_DECREF(results); | 
|  | 607 | return newstring; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 608 | } | 
|  | 609 |  | 
|  | 610 |  | 
|  | 611 | /* List of functions defined in the module */ | 
|  | 612 |  | 
|  | 613 | static PyMethodDef pcre_methods[] = { | 
|  | 614 | {"pcre_compile",		PyPcre_compile,		1}, | 
|  | 615 | {"pcre_expand",		PyPcre_expand,		1}, | 
|  | 616 | {NULL,		NULL}		/* sentinel */ | 
|  | 617 | }; | 
|  | 618 |  | 
|  | 619 |  | 
|  | 620 | /* | 
|  | 621 | * Convenience routine to export an integer value. | 
|  | 622 | * For simplicity, errors (which are unlikely anyway) are ignored. | 
|  | 623 | */ | 
|  | 624 |  | 
|  | 625 | static void | 
|  | 626 | insint(d, name, value) | 
|  | 627 | PyObject * d; | 
|  | 628 | char * name; | 
|  | 629 | int value; | 
|  | 630 | { | 
|  | 631 | PyObject *v = PyInt_FromLong((long) value); | 
|  | 632 | if (v == NULL) { | 
|  | 633 | /* Don't bother reporting this error */ | 
|  | 634 | PyErr_Clear(); | 
|  | 635 | } | 
|  | 636 | else { | 
|  | 637 | PyDict_SetItemString(d, name, v); | 
|  | 638 | Py_DECREF(v); | 
|  | 639 | } | 
|  | 640 | } | 
|  | 641 |  | 
|  | 642 |  | 
|  | 643 | /* Initialization function for the module (*must* be called initpcre) */ | 
|  | 644 |  | 
| Guido van Rossum | 3886bb6 | 1998-12-04 18:50:17 +0000 | [diff] [blame] | 645 | DL_EXPORT(void) | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 646 | initpcre() | 
|  | 647 | { | 
|  | 648 | PyObject *m, *d; | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 649 |  | 
| Fred Drake | 0d40ba4 | 2000-02-04 20:33:49 +0000 | [diff] [blame] | 650 | Pcre_Type.ob_type = &PyType_Type; | 
|  | 651 |  | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 652 | /* Create the module and add the functions */ | 
|  | 653 | m = Py_InitModule("pcre", pcre_methods); | 
|  | 654 |  | 
|  | 655 | /* Add some symbolic constants to the module */ | 
|  | 656 | d = PyModule_GetDict(m); | 
| Fred Drake | 589c35b | 2000-07-06 19:38:49 +0000 | [diff] [blame^] | 657 | ErrorObject = PyErr_NewException("pcre.error", NULL, NULL); | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 658 | PyDict_SetItemString(d, "error", ErrorObject); | 
|  | 659 |  | 
|  | 660 | /* Insert the flags */ | 
|  | 661 | insint(d, "IGNORECASE", PCRE_CASELESS); | 
|  | 662 | insint(d, "ANCHORED", PCRE_ANCHORED); | 
|  | 663 | insint(d, "MULTILINE", PCRE_MULTILINE); | 
|  | 664 | insint(d, "DOTALL", PCRE_DOTALL); | 
|  | 665 | insint(d, "VERBOSE", PCRE_EXTENDED); | 
| Guido van Rossum | 5070060 | 1997-12-08 17:15:20 +0000 | [diff] [blame] | 666 | insint(d, "LOCALE", PCRE_LOCALE); | 
| Guido van Rossum | 51b3aa3 | 1997-10-06 14:43:11 +0000 | [diff] [blame] | 667 |  | 
|  | 668 | /* Check for errors */ | 
|  | 669 | if (PyErr_Occurred()) | 
|  | 670 | Py_FatalError("can't initialize module pcre"); | 
|  | 671 | } | 
|  | 672 |  |