blob: 51a747aa7f7fadb73260b56f02f86bedc61505e4 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000027 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000029 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000030 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000031 * 2001-04-28 fl added __copy__ methods (work in progress)
32 * 2001-05-14 fl fixes for 1.5.2
Guido van Rossumb700df92000-03-31 14:59:30 +000033 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000034 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000035 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000036 * This version of the SRE library can be redistributed under CNRI's
37 * Python 1.6 license. For any other use, please contact Secret Labs
38 * AB (info@pythonware.com).
39 *
Guido van Rossumb700df92000-03-31 14:59:30 +000040 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000041 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000042 * other compatibility work.
43 */
44
45#ifndef SRE_RECURSIVE
46
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000047static char copyright[] =
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000048 " SRE 2.1.1 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000049
50#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000051#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000052
53#include "sre.h"
54
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000055#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000056
Fredrik Lundh436c3d582000-06-29 08:58:44 +000057/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000058#if !defined(SRE_MODULE)
59#define SRE_MODULE "sre"
60#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000061
Guido van Rossumb700df92000-03-31 14:59:30 +000062/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000064
Fredrik Lundh436c3d582000-06-29 08:58:44 +000065#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000066/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000067#define HAVE_UNICODE
68#endif
69
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000071/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000072
Fredrik Lundh33accc12000-08-27 20:59:47 +000073/* prevent run-away recursion (bad patterns on long strings) */
74
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000075#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000076#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
77/* require smaller recursion limit for a number of 64-bit platforms:
78 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
79/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
80#define USE_RECURSION_LIMIT 7500
81#else
82#define USE_RECURSION_LIMIT 10000
83#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000084#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000085
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000086/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000087#define USE_FAST_SEARCH
88
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000089/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000090#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000092/* enables copy/deepcopy handling (work in progress) */
93#undef USE_BUILTIN_COPY
94
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000095#if PY_VERSION_HEX < 0x01060000
96#define PyObject_DEL(op) PyMem_DEL((op))
97#endif
98
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000099/* -------------------------------------------------------------------- */
100
Fredrik Lundh80946112000-06-29 18:03:25 +0000101#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +0000102#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +0000103#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +0000104/* fastest possible local call under MSVC */
105#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000106#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000107#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000108#else
109#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000110#endif
111
112/* error codes */
113#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000114#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000115#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000116#define SRE_ERROR_MEMORY -9 /* out of memory */
117
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000118#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000119#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000120#else
121#define TRACE(v)
122#endif
123
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000124/* -------------------------------------------------------------------- */
125/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000126
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000127/* default character predicates (run sre_chars.py to regenerate tables) */
128
129#define SRE_DIGIT_MASK 1
130#define SRE_SPACE_MASK 2
131#define SRE_LINEBREAK_MASK 4
132#define SRE_ALNUM_MASK 8
133#define SRE_WORD_MASK 16
134
135static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1362, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1370, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
13825, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1400, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
14124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
142
Fredrik Lundhb389df32000-06-29 12:48:37 +0000143static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000014410, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
14527, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
14644, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
14761, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
148108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
149122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
150106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
151120, 121, 122, 123, 124, 125, 126, 127 };
152
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000153#define SRE_IS_DIGIT(ch)\
154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
155#define SRE_IS_SPACE(ch)\
156 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
157#define SRE_IS_LINEBREAK(ch)\
158 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
159#define SRE_IS_ALNUM(ch)\
160 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
161#define SRE_IS_WORD(ch)\
162 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000163
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000164static unsigned int sre_lower(unsigned int ch)
165{
166 return ((ch) < 128 ? sre_char_lower[ch] : ch);
167}
168
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000169/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000170
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000171#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
172#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
173#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
174#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
175#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
176
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000177static unsigned int sre_lower_locale(unsigned int ch)
178{
179 return ((ch) < 256 ? tolower((ch)) : ch);
180}
181
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000182/* unicode-specific character predicates */
183
184#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000185
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000186#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
187#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
188#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000189#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000190#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000191
192static unsigned int sre_lower_unicode(unsigned int ch)
193{
194 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
195}
196
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000197#endif
198
Guido van Rossumb700df92000-03-31 14:59:30 +0000199LOCAL(int)
200sre_category(SRE_CODE category, unsigned int ch)
201{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000202 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000203
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000204 case SRE_CATEGORY_DIGIT:
205 return SRE_IS_DIGIT(ch);
206 case SRE_CATEGORY_NOT_DIGIT:
207 return !SRE_IS_DIGIT(ch);
208 case SRE_CATEGORY_SPACE:
209 return SRE_IS_SPACE(ch);
210 case SRE_CATEGORY_NOT_SPACE:
211 return !SRE_IS_SPACE(ch);
212 case SRE_CATEGORY_WORD:
213 return SRE_IS_WORD(ch);
214 case SRE_CATEGORY_NOT_WORD:
215 return !SRE_IS_WORD(ch);
216 case SRE_CATEGORY_LINEBREAK:
217 return SRE_IS_LINEBREAK(ch);
218 case SRE_CATEGORY_NOT_LINEBREAK:
219 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000220
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000221 case SRE_CATEGORY_LOC_WORD:
222 return SRE_LOC_IS_WORD(ch);
223 case SRE_CATEGORY_LOC_NOT_WORD:
224 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000225
226#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000227 case SRE_CATEGORY_UNI_DIGIT:
228 return SRE_UNI_IS_DIGIT(ch);
229 case SRE_CATEGORY_UNI_NOT_DIGIT:
230 return !SRE_UNI_IS_DIGIT(ch);
231 case SRE_CATEGORY_UNI_SPACE:
232 return SRE_UNI_IS_SPACE(ch);
233 case SRE_CATEGORY_UNI_NOT_SPACE:
234 return !SRE_UNI_IS_SPACE(ch);
235 case SRE_CATEGORY_UNI_WORD:
236 return SRE_UNI_IS_WORD(ch);
237 case SRE_CATEGORY_UNI_NOT_WORD:
238 return !SRE_UNI_IS_WORD(ch);
239 case SRE_CATEGORY_UNI_LINEBREAK:
240 return SRE_UNI_IS_LINEBREAK(ch);
241 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
242 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000243#else
244 case SRE_CATEGORY_UNI_DIGIT:
245 return SRE_IS_DIGIT(ch);
246 case SRE_CATEGORY_UNI_NOT_DIGIT:
247 return !SRE_IS_DIGIT(ch);
248 case SRE_CATEGORY_UNI_SPACE:
249 return SRE_IS_SPACE(ch);
250 case SRE_CATEGORY_UNI_NOT_SPACE:
251 return !SRE_IS_SPACE(ch);
252 case SRE_CATEGORY_UNI_WORD:
253 return SRE_LOC_IS_WORD(ch);
254 case SRE_CATEGORY_UNI_NOT_WORD:
255 return !SRE_LOC_IS_WORD(ch);
256 case SRE_CATEGORY_UNI_LINEBREAK:
257 return SRE_IS_LINEBREAK(ch);
258 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
259 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000260#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000261 }
262 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000263}
264
265/* helpers */
266
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000267static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000268mark_fini(SRE_STATE* state)
269{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000270 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000271 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000272 state->mark_stack = NULL;
273 }
274 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000275}
276
277static int
278mark_save(SRE_STATE* state, int lo, int hi)
279{
280 void* stack;
281 int size;
282 int minsize, newsize;
283
284 if (hi <= lo)
285 return 0;
286
287 size = (hi - lo) + 1;
288
289 newsize = state->mark_stack_size;
290 minsize = state->mark_stack_base + size;
291
292 if (newsize < minsize) {
293 /* create new stack */
294 if (!newsize) {
295 newsize = 512;
296 if (newsize < minsize)
297 newsize = minsize;
298 TRACE(("allocate stack %d\n", newsize));
299 stack = malloc(sizeof(void*) * newsize);
300 } else {
301 /* grow the stack */
302 while (newsize < minsize)
303 newsize += newsize;
304 TRACE(("grow stack to %d\n", newsize));
305 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
306 }
307 if (!stack) {
308 mark_fini(state);
309 return SRE_ERROR_MEMORY;
310 }
311 state->mark_stack = stack;
312 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000313 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000314
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000315 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000316
317 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
318 size * sizeof(void*));
319
320 state->mark_stack_base += size;
321
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000322 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000323}
324
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000325static int
326mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000327{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000328 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000329
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000330 if (hi <= lo)
331 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000332
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000333 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000334
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000335 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000336
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000337 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000338
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000339 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
340 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000341
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000343}
344
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000345/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000346
347#define SRE_CHAR unsigned char
348#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000349#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000350#define SRE_CHARSET sre_charset
351#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000352#define SRE_MATCH sre_match
353#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000354
355#if defined(HAVE_UNICODE)
356
Guido van Rossumb700df92000-03-31 14:59:30 +0000357#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000358#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000359#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000360
Guido van Rossumb700df92000-03-31 14:59:30 +0000361#undef SRE_SEARCH
362#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000363#undef SRE_INFO
364#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000365#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000366#undef SRE_AT
367#undef SRE_CHAR
368
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000369/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000370
371#define SRE_CHAR Py_UNICODE
372#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000373#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000374#define SRE_CHARSET sre_ucharset
375#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000376#define SRE_MATCH sre_umatch
377#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000378#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000379
380#endif /* SRE_RECURSIVE */
381
382/* -------------------------------------------------------------------- */
383/* String matching engine */
384
385/* the following section is compiled twice, with different character
386 settings */
387
388LOCAL(int)
389SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
390{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000391 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000396
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000398 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000400
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 case SRE_AT_BEGINNING_LINE:
402 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000403 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000404
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000405 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000406 return (((void*) (ptr+1) == state->end &&
407 SRE_IS_LINEBREAK((int) ptr[0])) ||
408 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 case SRE_AT_END_LINE:
411 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000412 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000413
Fredrik Lundh770617b2001-01-14 15:06:11 +0000414 case SRE_AT_END_STRING:
415 return ((void*) ptr == state->end);
416
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000417 case SRE_AT_BOUNDARY:
418 if (state->beginning == state->end)
419 return 0;
420 that = ((void*) ptr > state->beginning) ?
421 SRE_IS_WORD((int) ptr[-1]) : 0;
422 this = ((void*) ptr < state->end) ?
423 SRE_IS_WORD((int) ptr[0]) : 0;
424 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000425
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000426 case SRE_AT_NON_BOUNDARY:
427 if (state->beginning == state->end)
428 return 0;
429 that = ((void*) ptr > state->beginning) ?
430 SRE_IS_WORD((int) ptr[-1]) : 0;
431 this = ((void*) ptr < state->end) ?
432 SRE_IS_WORD((int) ptr[0]) : 0;
433 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000434
435 case SRE_AT_LOC_BOUNDARY:
436 if (state->beginning == state->end)
437 return 0;
438 that = ((void*) ptr > state->beginning) ?
439 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
440 this = ((void*) ptr < state->end) ?
441 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
442 return this != that;
443
444 case SRE_AT_LOC_NON_BOUNDARY:
445 if (state->beginning == state->end)
446 return 0;
447 that = ((void*) ptr > state->beginning) ?
448 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
449 this = ((void*) ptr < state->end) ?
450 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
451 return this == that;
452
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000453#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000454 case SRE_AT_UNI_BOUNDARY:
455 if (state->beginning == state->end)
456 return 0;
457 that = ((void*) ptr > state->beginning) ?
458 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
459 this = ((void*) ptr < state->end) ?
460 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
461 return this != that;
462
463 case SRE_AT_UNI_NON_BOUNDARY:
464 if (state->beginning == state->end)
465 return 0;
466 that = ((void*) ptr > state->beginning) ?
467 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
468 this = ((void*) ptr < state->end) ?
469 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
470 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000471#endif
472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000473 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000474
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000475 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000476}
477
478LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000479SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000480{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000481 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000482
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000483 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000484
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000485 for (;;) {
486 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000487
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000488 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000489 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000490 if (ch == set[0])
491 return ok;
492 set++;
493 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000494
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000495 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000496 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000497 if (set[0] <= ch && ch <= set[1])
498 return ok;
499 set += 2;
500 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000501
Fredrik Lundh3562f112000-07-02 12:00:07 +0000502 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000503 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000504 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
505 return ok;
506 set += 16;
507 break;
508
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000509 case SRE_OP_BIGCHARSET:
510 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
511 {
512 int count, block;
513 count = *(set++);
514 block = ((unsigned char*)set)[ch >> 8];
515 set += 128;
516 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
517 return ok;
518 set += count*16;
519 break;
520 }
521
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000522 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000523 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000524 if (sre_category(set[0], (int) ch))
525 return ok;
526 set += 1;
527 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000528
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000529 case SRE_OP_NEGATE:
530 ok = !ok;
531 break;
532
533 case SRE_OP_FAILURE:
534 return !ok;
535
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000536 default:
537 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000538 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000539 return 0;
540 }
541 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000542}
543
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000544LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
545
546LOCAL(int)
547SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
548{
549 SRE_CODE chr;
550 SRE_CHAR* ptr = state->ptr;
551 SRE_CHAR* end = state->end;
552 int i;
553
554 /* adjust end */
555 if (maxcount < end - ptr && maxcount != 65535)
556 end = ptr + maxcount;
557
558 switch (pattern[0]) {
559
560 case SRE_OP_ANY:
561 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000562 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000563 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
564 ptr++;
565 break;
566
567 case SRE_OP_ANY_ALL:
568 /* repeated dot wildcare. skip to the end of the target
569 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000570 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000571 ptr = end;
572 break;
573
574 case SRE_OP_LITERAL:
575 /* repeated literal */
576 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000577 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000578 while (ptr < end && (SRE_CODE) *ptr == chr)
579 ptr++;
580 break;
581
582 case SRE_OP_LITERAL_IGNORE:
583 /* repeated literal */
584 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000585 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000586 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
587 ptr++;
588 break;
589
590 case SRE_OP_NOT_LITERAL:
591 /* repeated non-literal */
592 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000593 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000594 while (ptr < end && (SRE_CODE) *ptr != chr)
595 ptr++;
596 break;
597
598 case SRE_OP_NOT_LITERAL_IGNORE:
599 /* repeated non-literal */
600 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000601 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000602 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
603 ptr++;
604 break;
605
606 case SRE_OP_IN:
607 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000608 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
609 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000610 ptr++;
611 break;
612
613 default:
614 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000615 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000616 while ((SRE_CHAR*) state->ptr < end) {
617 i = SRE_MATCH(state, pattern, level);
618 if (i < 0)
619 return i;
620 if (!i)
621 break;
622 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000623 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
624 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000625 return (SRE_CHAR*) state->ptr - ptr;
626 }
627
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000628 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000629 return ptr - (SRE_CHAR*) state->ptr;
630}
631
Fredrik Lundh33accc12000-08-27 20:59:47 +0000632#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000633LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000634SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
635{
636 /* check if an SRE_OP_INFO block matches at the current position.
637 returns the number of SRE_CODE objects to skip if successful, 0
638 if no match */
639
640 SRE_CHAR* end = state->end;
641 SRE_CHAR* ptr = state->ptr;
642 int i;
643
644 /* check minimal length */
645 if (pattern[3] && (end - ptr) < pattern[3])
646 return 0;
647
648 /* check known prefix */
649 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
650 /* <length> <skip> <prefix data> <overlap data> */
651 for (i = 0; i < pattern[5]; i++)
652 if ((SRE_CODE) ptr[i] != pattern[7 + i])
653 return 0;
654 return pattern[0] + 2 * pattern[6];
655 }
656 return pattern[0];
657}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000658#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000659
660LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000661SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000662{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000663 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000664 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000665
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000666 SRE_CHAR* end = state->end;
667 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000668 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000669 SRE_REPEAT* rp;
670 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000671 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000672
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000673 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000675 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000676
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000677#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000678 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000679 return SRE_ERROR_RECURSION_LIMIT;
680#endif
681
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000682#if defined(USE_RECURSION_LIMIT)
683 if (level > USE_RECURSION_LIMIT)
684 return SRE_ERROR_RECURSION_LIMIT;
685#endif
686
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000687 if (pattern[0] == SRE_OP_INFO) {
688 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000689 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000690 if (pattern[3] && (end - ptr) < pattern[3]) {
691 TRACE(("reject (got %d chars, need %d)\n",
692 (end - ptr), pattern[3]));
693 return 0;
694 }
695 pattern += pattern[1] + 1;
696 }
697
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000699
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000700 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000701
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000702 case SRE_OP_FAILURE:
703 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000704 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000705 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000706
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000707 case SRE_OP_SUCCESS:
708 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000709 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000711 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000712
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000713 case SRE_OP_AT:
714 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000715 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000716 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000717 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000718 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000719 pattern++;
720 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000721
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000722 case SRE_OP_CATEGORY:
723 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000724 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000725 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000726 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000727 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000728 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000729 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000730 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000731
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 case SRE_OP_LITERAL:
733 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000734 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000735 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000737 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 pattern++;
739 ptr++;
740 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000741
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 case SRE_OP_NOT_LITERAL:
743 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000744 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000745 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000747 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 pattern++;
749 ptr++;
750 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000753 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000754 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000755 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000756 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
757 return 0;
758 ptr++;
759 break;
760
761 case SRE_OP_ANY_ALL:
762 /* match anything */
763 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000764 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000765 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000766 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 ptr++;
768 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000769
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000770 case SRE_OP_IN:
771 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000772 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000773 TRACE(("|%p|%p|IN\n", pattern, ptr));
774 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000775 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000776 pattern += pattern[0];
777 ptr++;
778 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000779
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000780 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000781 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000782 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000783 i = pattern[0];
784 {
785 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
786 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
787 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000788 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000789 while (p < e) {
790 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000791 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 p++; ptr++;
793 }
794 }
795 pattern++;
796 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000797
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000798 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000799 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000800 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000801 i = pattern[0];
802 {
803 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
804 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
805 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000806 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000807 while (p < e) {
808 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000809 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000810 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000811 p++; ptr++;
812 }
813 }
814 pattern++;
815 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000816
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000818 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000819 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000820 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000821 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000822 pattern++;
823 ptr++;
824 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000825
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000826 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000827 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000828 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000829 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000830 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000831 pattern++;
832 ptr++;
833 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000834
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000835 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000836 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000837 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000838 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000839 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000840 pattern += pattern[0];
841 ptr++;
842 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000843
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000844 case SRE_OP_MARK:
845 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000846 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000847 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000848 i = pattern[0];
849 if (i & 1)
850 state->lastindex = i/2 + 1;
851 if (i > state->lastmark)
852 state->lastmark = i;
853 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000854 pattern++;
855 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000857 case SRE_OP_JUMP:
858 case SRE_OP_INFO:
859 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000860 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000861 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000862 pattern += pattern[0];
863 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000864
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000865 case SRE_OP_ASSERT:
866 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000867 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000868 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000869 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000870 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000871 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000872 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000873 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000874 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000875 pattern += pattern[0];
876 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000877
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000878 case SRE_OP_ASSERT_NOT:
879 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000880 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000881 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000882 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000883 if (state->ptr >= state->beginning) {
884 i = SRE_MATCH(state, pattern + 2, level + 1);
885 if (i < 0)
886 return i;
887 if (i)
888 return 0;
889 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000890 pattern += pattern[0];
891 break;
892
893 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000894 /* alternation */
895 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000896 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000897 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000898 for (; pattern[0]; pattern += pattern[0]) {
899 if (pattern[1] == SRE_OP_LITERAL &&
900 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
901 continue;
902 if (pattern[1] == SRE_OP_IN &&
903 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
904 continue;
905 state->ptr = ptr;
906 i = SRE_MATCH(state, pattern + 1, level + 1);
907 if (i)
908 return i;
909 if (state->lastmark > lastmark) {
910 memset(
911 state->mark + lastmark + 1, 0,
912 (state->lastmark - lastmark) * sizeof(void*)
913 );
914 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000915 }
916 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000917 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000918
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000919 case SRE_OP_REPEAT_ONE:
920 /* match repeated sequence (maximizing regexp) */
921
922 /* this operator only works if the repeated item is
923 exactly one character wide, and we're not already
924 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000925 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000926
927 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
928
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000929 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000930 pattern[1], pattern[2]));
931
Fredrik Lundhe1869832000-08-01 22:47:49 +0000932 if (ptr + pattern[1] > end)
933 return 0; /* cannot match */
934
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000935 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000936
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000937 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
938 if (count < 0)
939 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000940
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000941 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000942
943 /* when we arrive here, count contains the number of
944 matches, and ptr points to the tail of the target
945 string. check if the rest of the pattern matches,
946 and backtrack if not. */
947
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000948 if (count < (int) pattern[1])
949 return 0;
950
951 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
952 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000953 state->ptr = ptr;
954 return 1;
955
956 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
957 /* tail starts with a literal. skip positions where
958 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000959 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000960 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000961 while (count >= (int) pattern[1] &&
962 (ptr >= end || *ptr != chr)) {
963 ptr--;
964 count--;
965 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000966 if (count < (int) pattern[1])
967 break;
968 state->ptr = ptr;
969 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000970 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000971 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000972 ptr--;
973 count--;
974 }
975
976 } else {
977 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000978 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000979 while (count >= (int) pattern[1]) {
980 state->ptr = ptr;
981 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000982 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000983 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000984 ptr--;
985 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000986 if (state->lastmark > lastmark) {
987 memset(
988 state->mark + lastmark + 1, 0,
989 (state->lastmark - lastmark) * sizeof(void*)
990 );
991 state->lastmark = lastmark;
992 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000993 }
994 }
995 return 0;
996
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000997 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000998 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +0000999 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001000 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001001 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001002 pattern[1], pattern[2]));
1003
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001004 rep.count = -1;
1005 rep.pattern = pattern;
1006
1007 /* install new repeat context */
1008 rep.prev = state->repeat;
1009 state->repeat = &rep;
1010
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001011 state->ptr = ptr;
1012 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001013
1014 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001015
1016 return i;
1017
1018 case SRE_OP_MAX_UNTIL:
1019 /* maximizing repeat */
1020 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1021
1022 /* FIXME: we probably need to deal with zero-width
1023 matches in here... */
1024
1025 rp = state->repeat;
1026 if (!rp)
1027 return SRE_ERROR_STATE;
1028
1029 state->ptr = ptr;
1030
1031 count = rp->count + 1;
1032
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001033 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001034
1035 if (count < rp->pattern[1]) {
1036 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001037 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001038 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001039 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001040 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001041 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001042 rp->count = count - 1;
1043 state->ptr = ptr;
1044 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001045 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001046
1047 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001048 /* we may have enough matches, but if we can
1049 match another item, do so */
1050 rp->count = count;
1051 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001052 i = mark_save(state, 0, lastmark);
1053 if (i < 0)
1054 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001055 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001056 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001057 if (i)
1058 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001059 i = mark_restore(state, 0, lastmark);
1060 if (i < 0)
1061 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001062 rp->count = count - 1;
1063 state->ptr = ptr;
1064 }
1065
1066 /* cannot match more repeated items here. make sure the
1067 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001068 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001069 i = SRE_MATCH(state, pattern, level + 1);
1070 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001071 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001072 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001073 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001074 return 0;
1075
1076 case SRE_OP_MIN_UNTIL:
1077 /* minimizing repeat */
1078 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1079
1080 rp = state->repeat;
1081 if (!rp)
1082 return SRE_ERROR_STATE;
1083
1084 count = rp->count + 1;
1085
Fredrik Lundh770617b2001-01-14 15:06:11 +00001086 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1087 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001088
1089 state->ptr = ptr;
1090
1091 if (count < rp->pattern[1]) {
1092 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001093 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001094 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001095 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001096 if (i)
1097 return i;
1098 rp->count = count-1;
1099 state->ptr = ptr;
1100 return 0;
1101 }
1102
1103 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001104 state->repeat = rp->prev;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +00001105 /* FIXME: the following fix doesn't always work (#133283) */
1106 if (0 && rp->pattern[2] == 65535) {
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001107 /* unbounded repeat */
1108 for (;;) {
1109 i = SRE_MATCH(state, pattern, level + 1);
1110 if (i || ptr >= end)
1111 break;
1112 state->ptr = ++ptr;
1113 }
1114 } else
1115 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001116 if (i) {
1117 /* free(rp); */
1118 return i;
1119 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001120
Fredrik Lundh770617b2001-01-14 15:06:11 +00001121 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001122 state->repeat = rp;
1123
1124 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1125 return 0;
1126
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001127 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001128 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001129 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001130 if (i)
1131 return i;
1132 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001133 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001134 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001135
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001136 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001137 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001138 return SRE_ERROR_ILLEGAL;
1139 }
1140 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001141
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001142 /* shouldn't end up here */
1143 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001144}
1145
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001146LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001147SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1148{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001149 SRE_CHAR* ptr = state->start;
1150 SRE_CHAR* end = state->end;
1151 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001152 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001153 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001154 SRE_CODE* prefix = NULL;
1155 SRE_CODE* charset = NULL;
1156 SRE_CODE* overlap = NULL;
1157 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001158
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001159 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001160 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001161 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001162
1163 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001164
1165 if (pattern[3] > 0) {
1166 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001167 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001168 end -= pattern[3]-1;
1169 if (end <= ptr)
1170 end = ptr+1;
1171 }
1172
Fredrik Lundh3562f112000-07-02 12:00:07 +00001173 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001174 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001175 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001176 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001177 prefix_skip = pattern[6];
1178 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001179 overlap = prefix + prefix_len - 1;
1180 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001181 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001182 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001183 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001184
1185 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001186 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001187
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001188 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1189 TRACE(("charset = %p\n", charset));
1190
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001191#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001192 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001193 /* pattern starts with a known prefix. use the overlap
1194 table to skip forward as fast as we possibly can */
1195 int i = 0;
1196 end = state->end;
1197 while (ptr < end) {
1198 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001199 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001200 if (!i)
1201 break;
1202 else
1203 i = overlap[i];
1204 } else {
1205 if (++i == prefix_len) {
1206 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001207 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1208 state->start = ptr + 1 - prefix_len;
1209 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001210 if (flags & SRE_INFO_LITERAL)
1211 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001212 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001213 if (status != 0)
1214 return status;
1215 /* close but no cigar -- try again */
1216 i = overlap[i];
1217 }
1218 break;
1219 }
1220
1221 }
1222 ptr++;
1223 }
1224 return 0;
1225 }
1226#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001227
Fredrik Lundh3562f112000-07-02 12:00:07 +00001228 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001229 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001230 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001231 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001232 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001233 for (;;) {
1234 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1235 ptr++;
1236 if (ptr == end)
1237 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001238 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001239 state->start = ptr;
1240 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001241 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 if (status != 0)
1243 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001244 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001245 } else if (charset) {
1246 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001247 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001248 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001249 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001250 ptr++;
1251 if (ptr == end)
1252 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001253 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001254 state->start = ptr;
1255 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001256 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001257 if (status != 0)
1258 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001259 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001260 }
1261 } else
1262 /* general case */
1263 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001264 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001265 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001266 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001267 if (status != 0)
1268 break;
1269 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001270
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001271 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001272}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001273
Guido van Rossumb700df92000-03-31 14:59:30 +00001274
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001275#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001276
1277/* -------------------------------------------------------------------- */
1278/* factories and destructors */
1279
1280/* see sre.h for object declarations */
1281
1282staticforward PyTypeObject Pattern_Type;
1283staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001284staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001285
1286static PyObject *
1287_compile(PyObject* self_, PyObject* args)
1288{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001289 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001290
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001291 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001292 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001293
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001294 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001295 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001296 PyObject* code;
1297 int groups = 0;
1298 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001299 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001300 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1301 &PyList_Type, &code, &groups,
1302 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001303 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001304
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001305 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001306
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001307 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001308 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001309 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001310
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001311 self->codesize = n;
1312
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001313 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001314 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001315 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001316 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001317
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001318 if (PyErr_Occurred()) {
1319 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001320 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001321 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001322
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001323 Py_INCREF(pattern);
1324 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001325
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001326 self->flags = flags;
1327
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001328 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001329
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001330 Py_XINCREF(groupindex);
1331 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001332
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001333 Py_XINCREF(indexgroup);
1334 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001335
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001336 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001337}
1338
1339static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001340sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001341{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001342 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001343}
1344
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001345static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001346sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001347{
1348 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001349 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001350 return NULL;
1351 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001352 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001353 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001354#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001355 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001356#else
1357 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001358#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001359 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001360}
1361
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001362LOCAL(void)
1363state_reset(SRE_STATE* state)
1364{
1365 int i;
1366
1367 state->lastmark = 0;
1368
1369 /* FIXME: dynamic! */
1370 for (i = 0; i < SRE_MARK_SIZE; i++)
1371 state->mark[i] = NULL;
1372
1373 state->lastindex = -1;
1374
1375 state->repeat = NULL;
1376
1377 mark_fini(state);
1378}
1379
Guido van Rossumb700df92000-03-31 14:59:30 +00001380LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001381state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1382 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001383{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001384 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001385
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001386 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001387 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001388 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001389
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001390 memset(state, 0, sizeof(SRE_STATE));
1391
1392 state->lastindex = -1;
1393
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001394#if defined(HAVE_UNICODE)
1395 if (PyUnicode_Check(string)) {
1396 /* unicode strings doesn't always support the buffer interface */
1397 ptr = (void*) PyUnicode_AS_DATA(string);
1398 bytes = PyUnicode_GET_DATA_SIZE(string);
1399 size = PyUnicode_GET_SIZE(string);
1400 state->charsize = sizeof(Py_UNICODE);
1401
1402 } else {
1403#endif
1404
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001405 /* get pointer to string buffer */
1406 buffer = string->ob_type->tp_as_buffer;
1407 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1408 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001409 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001410 return NULL;
1411 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001413 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001414 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1415 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001416 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1417 return NULL;
1418 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001420 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001421#if PY_VERSION_HEX >= 0x01060000
1422 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001423#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001424 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001425#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001426
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001427 if (PyString_Check(string) || bytes == size)
1428 state->charsize = 1;
1429#if defined(HAVE_UNICODE)
1430 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1431 state->charsize = sizeof(Py_UNICODE);
1432#endif
1433 else {
1434 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1435 return NULL;
1436 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001437
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001438#if defined(HAVE_UNICODE)
1439 }
1440#endif
1441
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001442 /* adjust boundaries */
1443 if (start < 0)
1444 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001445 else if (start > size)
1446 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001447
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001448 if (end < 0)
1449 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001450 else if (end > size)
1451 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001452
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001453 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001454
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001455 state->start = (void*) ((char*) ptr + start * state->charsize);
1456 state->end = (void*) ((char*) ptr + end * state->charsize);
1457
1458 Py_INCREF(string);
1459 state->string = string;
1460 state->pos = start;
1461 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001462
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001463 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001464 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001465 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001466#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001467 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001468#else
1469 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001470#endif
1471 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001472 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001473
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001474 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001475}
1476
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001477LOCAL(void)
1478state_fini(SRE_STATE* state)
1479{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001480 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001481 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001482}
1483
1484LOCAL(PyObject*)
1485state_getslice(SRE_STATE* state, int index, PyObject* string)
1486{
Fredrik Lundh58100642000-08-09 09:14:35 +00001487 int i, j;
1488
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001489 index = (index - 1) * 2;
1490
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001491 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001492 i = j = 0;
1493 } else {
1494 i = ((char*)state->mark[index] - (char*)state->beginning) /
1495 state->charsize;
1496 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1497 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001498 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001499
Fredrik Lundh58100642000-08-09 09:14:35 +00001500 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001501}
1502
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001503static void
1504pattern_error(int status)
1505{
1506 switch (status) {
1507 case SRE_ERROR_RECURSION_LIMIT:
1508 PyErr_SetString(
1509 PyExc_RuntimeError,
1510 "maximum recursion limit exceeded"
1511 );
1512 break;
1513 case SRE_ERROR_MEMORY:
1514 PyErr_NoMemory();
1515 break;
1516 default:
1517 /* other error codes indicate compiler/engine bugs */
1518 PyErr_SetString(
1519 PyExc_RuntimeError,
1520 "internal error in regular expression engine"
1521 );
1522 }
1523}
1524
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001525static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001526pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001527{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001528 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001529
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001530 MatchObject* match;
1531 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001532 char* base;
1533 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001534
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001535 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001536
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001537 /* create match object (with room for extra group marks) */
1538 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001539 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001540 if (!match)
1541 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001542
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001543 Py_INCREF(pattern);
1544 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001546 Py_INCREF(state->string);
1547 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001548
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001549 match->regs = NULL;
1550 match->groups = pattern->groups+1;
1551
1552 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001553
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001554 base = (char*) state->beginning;
1555 n = state->charsize;
1556
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001557 match->mark[0] = ((char*) state->start - base) / n;
1558 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001559
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001560 for (i = j = 0; i < pattern->groups; i++, j+=2)
1561 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1562 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1563 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1564 } else
1565 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1566
1567 match->pos = state->pos;
1568 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001569
Fredrik Lundh6f013982000-07-03 18:44:21 +00001570 match->lastindex = state->lastindex;
1571
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001573
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001574 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001575
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001576 /* no match */
1577 Py_INCREF(Py_None);
1578 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001579
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001580 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001581
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001582 /* internal error */
1583 pattern_error(status);
1584 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001585}
1586
1587static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001588pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001589{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001590 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001591
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001592 ScannerObject* self;
1593
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001594 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001595 int start = 0;
1596 int end = INT_MAX;
1597 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1598 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001599
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001600 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001601 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001602 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001603 return NULL;
1604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001605 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001606 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001607 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001608 return NULL;
1609 }
1610
1611 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001612 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001613
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001614 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001615}
1616
Guido van Rossumb700df92000-03-31 14:59:30 +00001617static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001618pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001619{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001620 Py_XDECREF(self->pattern);
1621 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001622 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001623 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001624}
1625
1626static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001627pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001628{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001629 SRE_STATE state;
1630 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001631
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001632 PyObject* string;
1633 int start = 0;
1634 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001635 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1636 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1637 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001638 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001639
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001640 string = state_init(&state, self, string, start, end);
1641 if (!string)
1642 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001643
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001644 state.ptr = state.start;
1645
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001646 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001648 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001649 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001650 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001651#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001652 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001653#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001654 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001655
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001656 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1657
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001658 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001659
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001660 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001661}
1662
1663static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001664pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001665{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001666 SRE_STATE state;
1667 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001668
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001669 PyObject* string;
1670 int start = 0;
1671 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001672 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1673 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1674 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001675 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001676
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001677 string = state_init(&state, self, string, start, end);
1678 if (!string)
1679 return NULL;
1680
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001681 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1682
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001683 if (state.charsize == 1) {
1684 status = sre_search(&state, PatternObject_GetCode(self));
1685 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001686#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001687 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001688#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001689 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001690
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001691 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1692
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001694
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001695 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001696}
1697
1698static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001699call(char* function, PyObject* args)
1700{
1701 PyObject* name;
1702 PyObject* module;
1703 PyObject* func;
1704 PyObject* result;
1705
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001706 name = PyString_FromString(SRE_MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001707 if (!name)
1708 return NULL;
1709 module = PyImport_Import(name);
1710 Py_DECREF(name);
1711 if (!module)
1712 return NULL;
1713 func = PyObject_GetAttrString(module, function);
1714 Py_DECREF(module);
1715 if (!func)
1716 return NULL;
1717 result = PyObject_CallObject(func, args);
1718 Py_DECREF(func);
1719 Py_DECREF(args);
1720 return result;
1721}
1722
1723static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001724pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001725{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001726 PyObject* template;
1727 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001728 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001729 static char* kwlist[] = { "repl", "string", "count", NULL };
1730 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1731 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001732 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001733
1734 /* delegate to Python code */
1735 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1736}
1737
1738static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001739pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001740{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001741 PyObject* template;
1742 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001743 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001744 static char* kwlist[] = { "repl", "string", "count", NULL };
1745 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1746 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001747 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001748
1749 /* delegate to Python code */
1750 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1751}
1752
1753static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001754pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001755{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001756 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001757 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001758 static char* kwlist[] = { "source", "maxsplit", NULL };
1759 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1760 &string, &maxsplit))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001761 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001762
1763 /* delegate to Python code */
1764 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1765}
1766
1767static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001768pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001769{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001770 SRE_STATE state;
1771 PyObject* list;
1772 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001773 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001774
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001775 PyObject* string;
1776 int start = 0;
1777 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001778 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1779 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1780 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001781 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001782
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001783 string = state_init(&state, self, string, start, end);
1784 if (!string)
1785 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001786
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001787 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001788
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001789 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001790
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001791 PyObject* item;
1792
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001793 state_reset(&state);
1794
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001795 state.ptr = state.start;
1796
1797 if (state.charsize == 1) {
1798 status = sre_search(&state, PatternObject_GetCode(self));
1799 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001800#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001801 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001802#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001803 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001804
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001805 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001806
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001807 /* don't bother to build a match object */
1808 switch (self->groups) {
1809 case 0:
1810 item = PySequence_GetSlice(
1811 string,
1812 ((char*) state.start - (char*) state.beginning) /
1813 state.charsize,
1814 ((char*) state.ptr - (char*) state.beginning) /
1815 state.charsize);
1816 if (!item)
1817 goto error;
1818 break;
1819 case 1:
1820 item = state_getslice(&state, 1, string);
1821 if (!item)
1822 goto error;
1823 break;
1824 default:
1825 item = PyTuple_New(self->groups);
1826 if (!item)
1827 goto error;
1828 for (i = 0; i < self->groups; i++) {
1829 PyObject* o = state_getslice(&state, i+1, string);
1830 if (!o) {
1831 Py_DECREF(item);
1832 goto error;
1833 }
1834 PyTuple_SET_ITEM(item, i, o);
1835 }
1836 break;
1837 }
1838
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001839 status = PyList_Append(list, item);
1840 Py_DECREF(item);
1841
1842 if (status < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001843 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001845 if (state.ptr == state.start)
1846 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001847 else
1848 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001849
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001850 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001851
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001852 if (status == 0)
1853 break;
1854
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001855 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001856 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001857
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001858 }
1859 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001860
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001861 state_fini(&state);
1862 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001863
1864error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001865 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001866 state_fini(&state);
1867 return NULL;
1868
Guido van Rossumb700df92000-03-31 14:59:30 +00001869}
1870
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001871static PyObject*
1872pattern_copy(PatternObject* self, PyObject* args)
1873{
1874#if USE_BUILTIN_COPY
1875 PatternObject* copy;
1876 int offset;
1877
1878 /* work in progress */
1879
1880 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1881 if (!copy)
1882 return NULL;
1883
1884 offset = offsetof(PatternObject, groups);
1885
1886 Py_XINCREF(self->groupindex);
1887 Py_XINCREF(self->indexgroup);
1888 Py_XINCREF(self->pattern);
1889
1890 memcpy((char*) copy + offset, (char*) self + offset,
1891 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
1892
1893 return (PyObject*) copy;
1894#else
1895 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1896 return NULL;
1897#endif
1898}
1899
1900static PyObject*
1901pattern_deepcopy(PatternObject* self, PyObject* args)
1902{
1903 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1904 return NULL;
1905}
1906
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001907static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00001908 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1909 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1910 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1911 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1912 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1913 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh562586e2000-10-03 20:43:34 +00001914 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001915 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
1916 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001917 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001918};
1919
1920static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001921pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001922{
1923 PyObject* res;
1924
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001925 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001926
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001927 if (res)
1928 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001929
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001930 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001931
1932 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001933 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001934 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001935 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001936 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001937
1938 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001939 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001940
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001941 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001942 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001943
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001944 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001945 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001946 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001947 }
1948
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001949 PyErr_SetString(PyExc_AttributeError, name);
1950 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001951}
1952
1953statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001954 PyObject_HEAD_INIT(NULL)
1955 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001956 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001957 (destructor)pattern_dealloc, /*tp_dealloc*/
1958 0, /*tp_print*/
1959 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001960};
1961
1962/* -------------------------------------------------------------------- */
1963/* match methods */
1964
1965static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001966match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001967{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001968 Py_XDECREF(self->regs);
1969 Py_XDECREF(self->string);
1970 Py_DECREF(self->pattern);
1971 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001972}
1973
1974static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001975match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001976{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001977 if (index < 0 || index >= self->groups) {
1978 /* raise IndexError if we were given a bad group number */
1979 PyErr_SetString(
1980 PyExc_IndexError,
1981 "no such group"
1982 );
1983 return NULL;
1984 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001985
Fredrik Lundh6f013982000-07-03 18:44:21 +00001986 index *= 2;
1987
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001988 if (self->string == Py_None || self->mark[index] < 0) {
1989 /* return default value if the string or group is undefined */
1990 Py_INCREF(def);
1991 return def;
1992 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001993
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001994 return PySequence_GetSlice(
1995 self->string, self->mark[index], self->mark[index+1]
1996 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001997}
1998
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001999static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002000match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002001{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002002 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002003
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002004 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002005 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002006
Fredrik Lundh6f013982000-07-03 18:44:21 +00002007 i = -1;
2008
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002009 if (self->pattern->groupindex) {
2010 index = PyObject_GetItem(self->pattern->groupindex, index);
2011 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002012 if (PyInt_Check(index))
2013 i = (int) PyInt_AS_LONG(index);
2014 Py_DECREF(index);
2015 } else
2016 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002017 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002018
2019 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002020}
2021
2022static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002023match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002024{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002025 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002026}
2027
2028static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002029match_expand(MatchObject* self, PyObject* args)
2030{
2031 PyObject* template;
2032 if (!PyArg_ParseTuple(args, "O:expand", &template))
2033 return NULL;
2034
2035 /* delegate to Python code */
2036 return call(
2037 "_expand",
2038 Py_BuildValue("OOO", self->pattern, self, template)
2039 );
2040}
2041
2042static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002043match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002044{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 PyObject* result;
2046 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002047
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002048 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002049
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002050 switch (size) {
2051 case 0:
2052 result = match_getslice(self, Py_False, Py_None);
2053 break;
2054 case 1:
2055 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2056 break;
2057 default:
2058 /* fetch multiple items */
2059 result = PyTuple_New(size);
2060 if (!result)
2061 return NULL;
2062 for (i = 0; i < size; i++) {
2063 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002064 self, PyTuple_GET_ITEM(args, i), Py_None
2065 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 if (!item) {
2067 Py_DECREF(result);
2068 return NULL;
2069 }
2070 PyTuple_SET_ITEM(result, i, item);
2071 }
2072 break;
2073 }
2074 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002075}
2076
2077static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002078match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002079{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002080 PyObject* result;
2081 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002082
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002083 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002084 static char* kwlist[] = { "default", NULL };
2085 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002086 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002088 result = PyTuple_New(self->groups-1);
2089 if (!result)
2090 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002091
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002092 for (index = 1; index < self->groups; index++) {
2093 PyObject* item;
2094 item = match_getslice_by_index(self, index, def);
2095 if (!item) {
2096 Py_DECREF(result);
2097 return NULL;
2098 }
2099 PyTuple_SET_ITEM(result, index-1, item);
2100 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002101
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002102 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002103}
2104
2105static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002106match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002107{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002108 PyObject* result;
2109 PyObject* keys;
2110 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002111
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002112 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002113 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002114 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002115 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002116
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002117 result = PyDict_New();
2118 if (!result || !self->pattern->groupindex)
2119 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002120
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002122 if (!keys)
2123 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002124
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002125 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002126 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002127 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002128 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002129 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002130 if (!key)
2131 goto failed;
2132 value = match_getslice(self, key, def);
2133 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002134 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002135 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002137 status = PyDict_SetItem(result, key, value);
2138 Py_DECREF(value);
2139 if (status < 0)
2140 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002142
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002143 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002144
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002145 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002146
2147failed:
2148 Py_DECREF(keys);
2149 Py_DECREF(result);
2150 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002151}
2152
2153static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002154match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002155{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002156 int index;
2157
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002158 PyObject* index_ = Py_False; /* zero */
2159 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2160 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002161
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002162 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002163
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002164 if (index < 0 || index >= self->groups) {
2165 PyErr_SetString(
2166 PyExc_IndexError,
2167 "no such group"
2168 );
2169 return NULL;
2170 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002171
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002172 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002173 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002174}
2175
2176static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002177match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002178{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002179 int index;
2180
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002181 PyObject* index_ = Py_False; /* zero */
2182 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2183 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002184
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002185 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002186
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002187 if (index < 0 || index >= self->groups) {
2188 PyErr_SetString(
2189 PyExc_IndexError,
2190 "no such group"
2191 );
2192 return NULL;
2193 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002194
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002195 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002196 return Py_BuildValue("i", self->mark[index*2+1]);
2197}
2198
2199LOCAL(PyObject*)
2200_pair(int i1, int i2)
2201{
2202 PyObject* pair;
2203 PyObject* item;
2204
2205 pair = PyTuple_New(2);
2206 if (!pair)
2207 return NULL;
2208
2209 item = PyInt_FromLong(i1);
2210 if (!item)
2211 goto error;
2212 PyTuple_SET_ITEM(pair, 0, item);
2213
2214 item = PyInt_FromLong(i2);
2215 if (!item)
2216 goto error;
2217 PyTuple_SET_ITEM(pair, 1, item);
2218
2219 return pair;
2220
2221 error:
2222 Py_DECREF(pair);
2223 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002224}
2225
2226static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002227match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002228{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002229 int index;
2230
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002231 PyObject* index_ = Py_False; /* zero */
2232 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2233 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002234
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002235 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002236
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002237 if (index < 0 || index >= self->groups) {
2238 PyErr_SetString(
2239 PyExc_IndexError,
2240 "no such group"
2241 );
2242 return NULL;
2243 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002244
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002245 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002246 return _pair(self->mark[index*2], self->mark[index*2+1]);
2247}
2248
2249static PyObject*
2250match_regs(MatchObject* self)
2251{
2252 PyObject* regs;
2253 PyObject* item;
2254 int index;
2255
2256 regs = PyTuple_New(self->groups);
2257 if (!regs)
2258 return NULL;
2259
2260 for (index = 0; index < self->groups; index++) {
2261 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2262 if (!item) {
2263 Py_DECREF(regs);
2264 return NULL;
2265 }
2266 PyTuple_SET_ITEM(regs, index, item);
2267 }
2268
2269 Py_INCREF(regs);
2270 self->regs = regs;
2271
2272 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002273}
2274
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002275static PyObject*
2276match_copy(MatchObject* self, PyObject* args)
2277{
2278#if USE_BUILTIN_COPY
2279 MatchObject* copy;
2280 int slots, offset;
2281
2282 /* works in progress */
2283
2284 slots = 2 * (self->pattern->groups+1);
2285
2286 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2287 if (!copy)
2288 return NULL;
2289
2290 /* this value a constant, but any compiler should be able to
2291 figure that out all by itself */
2292 offset = offsetof(MatchObject, string);
2293
2294 Py_XINCREF(self->pattern);
2295 Py_XINCREF(self->string);
2296 Py_XINCREF(self->regs);
2297
2298 memcpy((char*) copy + offset, (char*) self + offset,
2299 sizeof(MatchObject) + slots * sizeof(int) - offset);
2300
2301 return (PyObject*) copy;
2302#else
2303 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2304 return NULL;
2305#endif
2306}
2307
2308static PyObject*
2309match_deepcopy(MatchObject* self, PyObject* args)
2310{
2311 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2312 return NULL;
2313}
2314
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002315static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002316 {"group", (PyCFunction) match_group, METH_VARARGS},
2317 {"start", (PyCFunction) match_start, METH_VARARGS},
2318 {"end", (PyCFunction) match_end, METH_VARARGS},
2319 {"span", (PyCFunction) match_span, METH_VARARGS},
2320 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2321 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2322 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002323 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2324 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002325 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002326};
2327
2328static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002329match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002330{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002331 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002332
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002333 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2334 if (res)
2335 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002336
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002337 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002338
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002339 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002340 if (self->lastindex >= 0)
2341 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002342 Py_INCREF(Py_None);
2343 return Py_None;
2344 }
2345
2346 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002347 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002348 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002349 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002350 );
2351 if (result)
2352 return result;
2353 PyErr_Clear();
2354 }
2355 Py_INCREF(Py_None);
2356 return Py_None;
2357 }
2358
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002359 if (!strcmp(name, "string")) {
2360 if (self->string) {
2361 Py_INCREF(self->string);
2362 return self->string;
2363 } else {
2364 Py_INCREF(Py_None);
2365 return Py_None;
2366 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002367 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002368
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002369 if (!strcmp(name, "regs")) {
2370 if (self->regs) {
2371 Py_INCREF(self->regs);
2372 return self->regs;
2373 } else
2374 return match_regs(self);
2375 }
2376
2377 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002378 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002379 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002380 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002382 if (!strcmp(name, "pos"))
2383 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002385 if (!strcmp(name, "endpos"))
2386 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002387
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002388 PyErr_SetString(PyExc_AttributeError, name);
2389 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002390}
2391
2392/* FIXME: implement setattr("string", None) as a special case (to
2393 detach the associated string, if any */
2394
2395statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002396 PyObject_HEAD_INIT(NULL)
2397 0, "SRE_Match",
2398 sizeof(MatchObject), sizeof(int),
2399 (destructor)match_dealloc, /*tp_dealloc*/
2400 0, /*tp_print*/
2401 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002402};
2403
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002404/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002405/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002406
2407static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002408scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002409{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002410 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002411 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002412 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002413}
2414
2415static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002416scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002417{
2418 SRE_STATE* state = &self->state;
2419 PyObject* match;
2420 int status;
2421
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002422 state_reset(state);
2423
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002424 state->ptr = state->start;
2425
2426 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002427 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002428 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002429#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002430 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002431#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002432 }
2433
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002434 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002435 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002436
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002437 if (status == 0 || state->ptr == state->start)
2438 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002439 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002440 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002441
2442 return match;
2443}
2444
2445
2446static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002447scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002448{
2449 SRE_STATE* state = &self->state;
2450 PyObject* match;
2451 int status;
2452
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002453 state_reset(state);
2454
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002455 state->ptr = state->start;
2456
2457 if (state->charsize == 1) {
2458 status = sre_search(state, PatternObject_GetCode(self->pattern));
2459 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002460#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002461 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002462#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002463 }
2464
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002465 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002466 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002467
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002468 if (status == 0 || state->ptr == state->start)
2469 state->start = (void*) ((char*) state->ptr + state->charsize);
2470 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002471 state->start = state->ptr;
2472
2473 return match;
2474}
2475
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002476static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002477 {"match", (PyCFunction) scanner_match, 0},
2478 {"search", (PyCFunction) scanner_search, 0},
2479 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002480};
2481
2482static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002483scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002484{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002485 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002486
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002487 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2488 if (res)
2489 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002490
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002491 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002492
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002493 /* attributes */
2494 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002495 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002496 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002497 }
2498
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002499 PyErr_SetString(PyExc_AttributeError, name);
2500 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002501}
2502
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002503statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002504 PyObject_HEAD_INIT(NULL)
2505 0, "SRE_Scanner",
2506 sizeof(ScannerObject), 0,
2507 (destructor)scanner_dealloc, /*tp_dealloc*/
2508 0, /*tp_print*/
2509 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002510};
2511
Guido van Rossumb700df92000-03-31 14:59:30 +00002512static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002513 {"compile", _compile, 1},
2514 {"getcodesize", sre_codesize, 1},
2515 {"getlower", sre_getlower, 1},
2516 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002517};
2518
Tim Peters5687ffe2001-02-28 16:44:18 +00002519DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002520init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002521{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002522 PyObject* m;
2523 PyObject* d;
2524
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002525 /* Patch object types */
2526 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002527 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002528
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002529 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002530 d = PyModule_GetDict(m);
2531
2532 PyDict_SetItemString(
2533 d, "MAGIC", (PyObject*) PyInt_FromLong(SRE_MAGIC)
2534 );
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002535
2536 PyDict_SetItemString(
2537 d, "copyright", (PyObject*) PyString_FromString(copyright)
2538 );
2539
Guido van Rossumb700df92000-03-31 14:59:30 +00002540}
2541
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002542#endif /* !defined(SRE_RECURSIVE) */