blob: dc6478bca2ddd27da6c63d3b21ed70b14d338010 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00007 * 99-10-24 fl created (based on existing template matcher code)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00008 * 00-03-06 fl first alpha, sort of (0.5)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00009 * 00-06-30 fl added fast search optimization (0.9.3)
10 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11 * 00-07-02 fl added charset optimizations, etc (0.9.5)
12 * 00-07-03 fl store code in pattern object, lookbehind, etc
13 * 00-07-08 fl added regs attribute
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000014 * 00-07-21 fl reset lastindex in scanner methods (0.9.6)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000015 * 00-08-01 fl fixes for 1.6b1 (0.9.8)
Fredrik Lundh96ab4652000-08-03 16:29:50 +000016 * 00-08-03 fl added recursion limit
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000017 * 00-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh58100642000-08-09 09:14:35 +000018 * 00-08-08 fl changed findall to return empty strings instead of None
Fredrik Lundh33accc12000-08-27 20:59:47 +000019 * 00-08-27 fl properly propagate memory errors
Fredrik Lundh510c97b2000-09-02 16:36:57 +000020 * 00-09-02 fl return -1 instead of None for start/end/span
Guido van Rossumb700df92000-03-31 14:59:30 +000021 *
22 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
23 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000024 * This version of the SRE library can be redistributed under CNRI's
25 * Python 1.6 license. For any other use, please contact Secret Labs
26 * AB (info@pythonware.com).
27 *
Guido van Rossumb700df92000-03-31 14:59:30 +000028 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000030 * other compatibility work.
31 */
32
33#ifndef SRE_RECURSIVE
34
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000035char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000036
37#include "Python.h"
38
39#include "sre.h"
40
Guido van Rossumb700df92000-03-31 14:59:30 +000041#if defined(HAVE_LIMITS_H)
42#include <limits.h>
43#else
44#define INT_MAX 2147483647
45#endif
46
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000047#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000048
Fredrik Lundh436c3d582000-06-29 08:58:44 +000049/* name of this module, minus the leading underscore */
50#define MODULE "sre"
51
Guido van Rossumb700df92000-03-31 14:59:30 +000052/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000053#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000054
Fredrik Lundh436c3d582000-06-29 08:58:44 +000055#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000056/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000057#define HAVE_UNICODE
58#endif
59
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000061/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062
Fredrik Lundh33accc12000-08-27 20:59:47 +000063/* prevent run-away recursion (bad patterns on long strings) */
64
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000065#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000066#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
67/* require smaller recursion limit for a number of 64-bit platforms:
68 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
69/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
70#define USE_RECURSION_LIMIT 7500
71#else
72#define USE_RECURSION_LIMIT 10000
73#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000074#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000075
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000076/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000077#define USE_FAST_SEARCH
78
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000080#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081
82/* -------------------------------------------------------------------- */
83
Fredrik Lundh80946112000-06-29 18:03:25 +000084#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000085#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000086#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000087/* fastest possible local call under MSVC */
88#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000089#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000090#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#else
92#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000093#endif
94
95/* error codes */
96#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000097#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000098#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000099#define SRE_ERROR_MEMORY -9 /* out of memory */
100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000101#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000102#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000103#else
104#define TRACE(v)
105#endif
106
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000107/* -------------------------------------------------------------------- */
108/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000109
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000110/* default character predicates (run sre_chars.py to regenerate tables) */
111
112#define SRE_DIGIT_MASK 1
113#define SRE_SPACE_MASK 2
114#define SRE_LINEBREAK_MASK 4
115#define SRE_ALNUM_MASK 8
116#define SRE_WORD_MASK 16
117
118static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1192, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12125, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1230, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
125
Fredrik Lundhb389df32000-06-29 12:48:37 +0000126static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012710, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12827, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12944, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13061, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
131108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
132122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
133106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
134120, 121, 122, 123, 124, 125, 126, 127 };
135
Fredrik Lundhb389df32000-06-29 12:48:37 +0000136static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000137{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000138 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000139}
140
141#define SRE_IS_DIGIT(ch)\
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
143#define SRE_IS_SPACE(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
145#define SRE_IS_LINEBREAK(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
147#define SRE_IS_ALNUM(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
149#define SRE_IS_WORD(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000151
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000152/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000153
Fredrik Lundhb389df32000-06-29 12:48:37 +0000154static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000155{
156 return ((ch) < 256 ? tolower((ch)) : ch);
157}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000158#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
159#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
160#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
161#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
162#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
163
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000164/* unicode-specific character predicates */
165
166#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000167static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000168{
169 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
170}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000171#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
172#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
173#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000174#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000175#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176#endif
177
Guido van Rossumb700df92000-03-31 14:59:30 +0000178LOCAL(int)
179sre_category(SRE_CODE category, unsigned int ch)
180{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000181 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000182
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000183 case SRE_CATEGORY_DIGIT:
184 return SRE_IS_DIGIT(ch);
185 case SRE_CATEGORY_NOT_DIGIT:
186 return !SRE_IS_DIGIT(ch);
187 case SRE_CATEGORY_SPACE:
188 return SRE_IS_SPACE(ch);
189 case SRE_CATEGORY_NOT_SPACE:
190 return !SRE_IS_SPACE(ch);
191 case SRE_CATEGORY_WORD:
192 return SRE_IS_WORD(ch);
193 case SRE_CATEGORY_NOT_WORD:
194 return !SRE_IS_WORD(ch);
195 case SRE_CATEGORY_LINEBREAK:
196 return SRE_IS_LINEBREAK(ch);
197 case SRE_CATEGORY_NOT_LINEBREAK:
198 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000199
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000200 case SRE_CATEGORY_LOC_WORD:
201 return SRE_LOC_IS_WORD(ch);
202 case SRE_CATEGORY_LOC_NOT_WORD:
203 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000204
205#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000206 case SRE_CATEGORY_UNI_DIGIT:
207 return SRE_UNI_IS_DIGIT(ch);
208 case SRE_CATEGORY_UNI_NOT_DIGIT:
209 return !SRE_UNI_IS_DIGIT(ch);
210 case SRE_CATEGORY_UNI_SPACE:
211 return SRE_UNI_IS_SPACE(ch);
212 case SRE_CATEGORY_UNI_NOT_SPACE:
213 return !SRE_UNI_IS_SPACE(ch);
214 case SRE_CATEGORY_UNI_WORD:
215 return SRE_UNI_IS_WORD(ch);
216 case SRE_CATEGORY_UNI_NOT_WORD:
217 return !SRE_UNI_IS_WORD(ch);
218 case SRE_CATEGORY_UNI_LINEBREAK:
219 return SRE_UNI_IS_LINEBREAK(ch);
220 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
221 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000222#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000223 }
224 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000225}
226
227/* helpers */
228
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000229static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000230mark_fini(SRE_STATE* state)
231{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000232 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000233 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000234 state->mark_stack = NULL;
235 }
236 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000237}
238
239static int
240mark_save(SRE_STATE* state, int lo, int hi)
241{
242 void* stack;
243 int size;
244 int minsize, newsize;
245
246 if (hi <= lo)
247 return 0;
248
249 size = (hi - lo) + 1;
250
251 newsize = state->mark_stack_size;
252 minsize = state->mark_stack_base + size;
253
254 if (newsize < minsize) {
255 /* create new stack */
256 if (!newsize) {
257 newsize = 512;
258 if (newsize < minsize)
259 newsize = minsize;
260 TRACE(("allocate stack %d\n", newsize));
261 stack = malloc(sizeof(void*) * newsize);
262 } else {
263 /* grow the stack */
264 while (newsize < minsize)
265 newsize += newsize;
266 TRACE(("grow stack to %d\n", newsize));
267 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
268 }
269 if (!stack) {
270 mark_fini(state);
271 return SRE_ERROR_MEMORY;
272 }
273 state->mark_stack = stack;
274 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000275 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000276
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000277 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278
279 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
280 size * sizeof(void*));
281
282 state->mark_stack_base += size;
283
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000284 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000285}
286
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000287static int
288mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000289{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000290 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000291
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000292 if (hi <= lo)
293 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000294
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000295 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000296
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000297 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000298
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000299 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000300
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000301 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
302 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000303
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000304 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000305}
306
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000307/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000308
309#define SRE_CHAR unsigned char
310#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000311#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000312#define SRE_CHARSET sre_charset
313#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000314#define SRE_MATCH sre_match
315#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000316
317#if defined(HAVE_UNICODE)
318
Guido van Rossumb700df92000-03-31 14:59:30 +0000319#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000320#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000321#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000322
Guido van Rossumb700df92000-03-31 14:59:30 +0000323#undef SRE_SEARCH
324#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000325#undef SRE_INFO
326#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000327#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000328#undef SRE_AT
329#undef SRE_CHAR
330
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000331/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000332
333#define SRE_CHAR Py_UNICODE
334#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000335#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000336#define SRE_CHARSET sre_ucharset
337#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000338#define SRE_MATCH sre_umatch
339#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000340#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000341
342#endif /* SRE_RECURSIVE */
343
344/* -------------------------------------------------------------------- */
345/* String matching engine */
346
347/* the following section is compiled twice, with different character
348 settings */
349
350LOCAL(int)
351SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
352{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000354
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000355 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000357 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000358
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000359 case SRE_AT_BEGINNING:
360 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000361
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000362 case SRE_AT_BEGINNING_LINE:
363 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000364 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000365
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000366 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000367 return (((void*) (ptr+1) == state->end &&
368 SRE_IS_LINEBREAK((int) ptr[0])) ||
369 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000370
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000371 case SRE_AT_END_LINE:
372 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000373 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000374
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000375 case SRE_AT_BOUNDARY:
376 if (state->beginning == state->end)
377 return 0;
378 that = ((void*) ptr > state->beginning) ?
379 SRE_IS_WORD((int) ptr[-1]) : 0;
380 this = ((void*) ptr < state->end) ?
381 SRE_IS_WORD((int) ptr[0]) : 0;
382 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000383
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000384 case SRE_AT_NON_BOUNDARY:
385 if (state->beginning == state->end)
386 return 0;
387 that = ((void*) ptr > state->beginning) ?
388 SRE_IS_WORD((int) ptr[-1]) : 0;
389 this = ((void*) ptr < state->end) ?
390 SRE_IS_WORD((int) ptr[0]) : 0;
391 return this == that;
392 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000393
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000394 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000395}
396
397LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000398SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000399{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000400 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000403
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 for (;;) {
405 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000408 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 if (ch == set[0])
410 return ok;
411 set++;
412 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000413
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000414 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000415 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000416 if (set[0] <= ch && ch <= set[1])
417 return ok;
418 set += 2;
419 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000420
Fredrik Lundh3562f112000-07-02 12:00:07 +0000421 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000422 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000423 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
424 return ok;
425 set += 16;
426 break;
427
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000428 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000429 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 if (sre_category(set[0], (int) ch))
431 return ok;
432 set += 1;
433 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000434
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000435 case SRE_OP_NEGATE:
436 ok = !ok;
437 break;
438
439 case SRE_OP_FAILURE:
440 return !ok;
441
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000442 default:
443 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000444 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000445 return 0;
446 }
447 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000448}
449
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000450LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
451
452LOCAL(int)
453SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
454{
455 SRE_CODE chr;
456 SRE_CHAR* ptr = state->ptr;
457 SRE_CHAR* end = state->end;
458 int i;
459
460 /* adjust end */
461 if (maxcount < end - ptr && maxcount != 65535)
462 end = ptr + maxcount;
463
464 switch (pattern[0]) {
465
466 case SRE_OP_ANY:
467 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000468 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000469 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
470 ptr++;
471 break;
472
473 case SRE_OP_ANY_ALL:
474 /* repeated dot wildcare. skip to the end of the target
475 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000476 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000477 ptr = end;
478 break;
479
480 case SRE_OP_LITERAL:
481 /* repeated literal */
482 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000483 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000484 while (ptr < end && (SRE_CODE) *ptr == chr)
485 ptr++;
486 break;
487
488 case SRE_OP_LITERAL_IGNORE:
489 /* repeated literal */
490 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000491 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000492 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
493 ptr++;
494 break;
495
496 case SRE_OP_NOT_LITERAL:
497 /* repeated non-literal */
498 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000499 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000500 while (ptr < end && (SRE_CODE) *ptr != chr)
501 ptr++;
502 break;
503
504 case SRE_OP_NOT_LITERAL_IGNORE:
505 /* repeated non-literal */
506 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000507 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000508 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
509 ptr++;
510 break;
511
512 case SRE_OP_IN:
513 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000514 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
515 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000516 ptr++;
517 break;
518
519 default:
520 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000521 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000522 while ((SRE_CHAR*) state->ptr < end) {
523 i = SRE_MATCH(state, pattern, level);
524 if (i < 0)
525 return i;
526 if (!i)
527 break;
528 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000529 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
530 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000531 return (SRE_CHAR*) state->ptr - ptr;
532 }
533
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000534 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000535 return ptr - (SRE_CHAR*) state->ptr;
536}
537
Fredrik Lundh33accc12000-08-27 20:59:47 +0000538#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000539LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000540SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
541{
542 /* check if an SRE_OP_INFO block matches at the current position.
543 returns the number of SRE_CODE objects to skip if successful, 0
544 if no match */
545
546 SRE_CHAR* end = state->end;
547 SRE_CHAR* ptr = state->ptr;
548 int i;
549
550 /* check minimal length */
551 if (pattern[3] && (end - ptr) < pattern[3])
552 return 0;
553
554 /* check known prefix */
555 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
556 /* <length> <skip> <prefix data> <overlap data> */
557 for (i = 0; i < pattern[5]; i++)
558 if ((SRE_CODE) ptr[i] != pattern[7 + i])
559 return 0;
560 return pattern[0] + 2 * pattern[6];
561 }
562 return pattern[0];
563}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000564#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000565
566LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000567SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000568{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000569 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000570 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000571
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000572 SRE_CHAR* end = state->end;
573 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000574 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000575 SRE_REPEAT* rp;
576 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000577 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000578
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000579 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000580
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000581 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000582
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000583#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000584 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000585 return SRE_ERROR_RECURSION_LIMIT;
586#endif
587
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000588#if defined(USE_RECURSION_LIMIT)
589 if (level > USE_RECURSION_LIMIT)
590 return SRE_ERROR_RECURSION_LIMIT;
591#endif
592
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000593 if (pattern[0] == SRE_OP_INFO) {
594 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000595 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000596 if (pattern[3] && (end - ptr) < pattern[3]) {
597 TRACE(("reject (got %d chars, need %d)\n",
598 (end - ptr), pattern[3]));
599 return 0;
600 }
601 pattern += pattern[1] + 1;
602 }
603
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000604 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000607
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000608 case SRE_OP_FAILURE:
609 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000610 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000611 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000612
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 case SRE_OP_SUCCESS:
614 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000615 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000616 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000617 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000618
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000619 case SRE_OP_AT:
620 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000621 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000622 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000623 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000624 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000625 pattern++;
626 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000627
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000628 case SRE_OP_CATEGORY:
629 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000630 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000631 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000632 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000633 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000634 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000635 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000636 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000637
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000638 case SRE_OP_LITERAL:
639 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000640 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000641 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000642 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000643 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000644 pattern++;
645 ptr++;
646 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000648 case SRE_OP_NOT_LITERAL:
649 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000650 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000651 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000652 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000653 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 pattern++;
655 ptr++;
656 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000657
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000658 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000659 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000660 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000661 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000662 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
663 return 0;
664 ptr++;
665 break;
666
667 case SRE_OP_ANY_ALL:
668 /* match anything */
669 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000670 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000671 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000672 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000673 ptr++;
674 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 case SRE_OP_IN:
677 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000678 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000679 TRACE(("|%p|%p|IN\n", pattern, ptr));
680 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000681 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000682 pattern += pattern[0];
683 ptr++;
684 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000685
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000686 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000687 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000688 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 i = pattern[0];
690 {
691 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
692 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
693 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000694 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 while (p < e) {
696 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000697 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 p++; ptr++;
699 }
700 }
701 pattern++;
702 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000703
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000704 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000705 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000706 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000707 i = pattern[0];
708 {
709 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
710 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
711 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000712 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000713 while (p < e) {
714 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000715 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000716 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000717 p++; ptr++;
718 }
719 }
720 pattern++;
721 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000722
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000723 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000724 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000726 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000727 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000728 pattern++;
729 ptr++;
730 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000731
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000733 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000735 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000736 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000737 pattern++;
738 ptr++;
739 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000740
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000742 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000744 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000745 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 pattern += pattern[0];
747 ptr++;
748 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000749
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000750 case SRE_OP_MARK:
751 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000752 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000753 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000754 i = pattern[0];
755 if (i & 1)
756 state->lastindex = i/2 + 1;
757 if (i > state->lastmark)
758 state->lastmark = i;
759 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000760 pattern++;
761 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000762
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000763 case SRE_OP_JUMP:
764 case SRE_OP_INFO:
765 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000766 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000767 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 pattern += pattern[0];
769 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000770
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000771 case SRE_OP_ASSERT:
772 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000773 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000774 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000775 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000776 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000777 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000778 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000779 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000780 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000781 if (pattern[1] > 0 && state->ptr != ptr)
782 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000783 pattern += pattern[0];
784 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000785
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000786 case SRE_OP_ASSERT_NOT:
787 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000788 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000789 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000791 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000792 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000793 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000794 if (i < 0)
795 return i;
796 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000797 return 0;
798 if (pattern[1] > 0 && state->ptr != ptr)
799 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 pattern += pattern[0];
801 break;
802
803 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000804 /* alternation */
805 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000806 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000807 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000808 for (; pattern[0]; pattern += pattern[0]) {
809 if (pattern[1] == SRE_OP_LITERAL &&
810 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
811 continue;
812 if (pattern[1] == SRE_OP_IN &&
813 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
814 continue;
815 state->ptr = ptr;
816 i = SRE_MATCH(state, pattern + 1, level + 1);
817 if (i)
818 return i;
819 if (state->lastmark > lastmark) {
820 memset(
821 state->mark + lastmark + 1, 0,
822 (state->lastmark - lastmark) * sizeof(void*)
823 );
824 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000825 }
826 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000827 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000828
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000829 case SRE_OP_REPEAT_ONE:
830 /* match repeated sequence (maximizing regexp) */
831
832 /* this operator only works if the repeated item is
833 exactly one character wide, and we're not already
834 collecting backtracking points. for other cases,
835 use the MAX_REPEAT operator instead */
836
837 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
838
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000839 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000840 pattern[1], pattern[2]));
841
Fredrik Lundhe1869832000-08-01 22:47:49 +0000842 if (ptr + pattern[1] > end)
843 return 0; /* cannot match */
844
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000845 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000846
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000847 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
848 if (count < 0)
849 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000850
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000851 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000852
853 /* when we arrive here, count contains the number of
854 matches, and ptr points to the tail of the target
855 string. check if the rest of the pattern matches,
856 and backtrack if not. */
857
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000858 if (count < (int) pattern[1])
859 return 0;
860
861 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
862 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000863 state->ptr = ptr;
864 return 1;
865
866 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
867 /* tail starts with a literal. skip positions where
868 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000869 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000870 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000871 while (count >= (int) pattern[1] &&
872 (ptr >= end || *ptr != chr)) {
873 ptr--;
874 count--;
875 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000876 if (count < (int) pattern[1])
877 break;
878 state->ptr = ptr;
879 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000880 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000881 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000882 ptr--;
883 count--;
884 }
885
886 } else {
887 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000888 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000889 while (count >= (int) pattern[1]) {
890 state->ptr = ptr;
891 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000892 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000893 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000894 ptr--;
895 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000896 if (state->lastmark > lastmark) {
897 memset(
898 state->mark + lastmark + 1, 0,
899 (state->lastmark - lastmark) * sizeof(void*)
900 );
901 state->lastmark = lastmark;
902 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000903 }
904 }
905 return 0;
906
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000907 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000908 /* create repeat context. all the hard work is done
909 by the UNTIL operator */
910 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000911 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000912 pattern[1], pattern[2]));
913
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000914 rep.count = -1;
915 rep.pattern = pattern;
916
917 /* install new repeat context */
918 rep.prev = state->repeat;
919 state->repeat = &rep;
920
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000921 state->ptr = ptr;
922 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000923
924 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000925
926 return i;
927
928 case SRE_OP_MAX_UNTIL:
929 /* maximizing repeat */
930 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
931
932 /* FIXME: we probably need to deal with zero-width
933 matches in here... */
934
935 rp = state->repeat;
936 if (!rp)
937 return SRE_ERROR_STATE;
938
939 state->ptr = ptr;
940
941 count = rp->count + 1;
942
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000943 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000944
945 if (count < rp->pattern[1]) {
946 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000947 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000948 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000949 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000950 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000951 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000952 rp->count = count - 1;
953 state->ptr = ptr;
954 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000955 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000956
957 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000958 /* we may have enough matches, but if we can
959 match another item, do so */
960 rp->count = count;
961 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000962 i = mark_save(state, 0, lastmark);
963 if (i < 0)
964 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000965 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000966 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000967 if (i)
968 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000969 i = mark_restore(state, 0, lastmark);
970 if (i < 0)
971 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000972 rp->count = count - 1;
973 state->ptr = ptr;
974 }
975
976 /* cannot match more repeated items here. make sure the
977 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000978 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000979 i = SRE_MATCH(state, pattern, level + 1);
980 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000981 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000982 state->repeat = rp;
983 return 0;
984
985 case SRE_OP_MIN_UNTIL:
986 /* minimizing repeat */
987 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
988
989 rp = state->repeat;
990 if (!rp)
991 return SRE_ERROR_STATE;
992
993 count = rp->count + 1;
994
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000995 TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000996
997 state->ptr = ptr;
998
999 if (count < rp->pattern[1]) {
1000 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001001 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001002 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001003 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001004 if (i)
1005 return i;
1006 rp->count = count-1;
1007 state->ptr = ptr;
1008 return 0;
1009 }
1010
1011 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001012 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001013 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001014 if (i) {
1015 /* free(rp); */
1016 return i;
1017 }
1018 state->repeat = rp;
1019
1020 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1021 return 0;
1022
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001023 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001024 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001025 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001026 if (i)
1027 return i;
1028 rp->count = count - 1;
1029 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001030
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001031 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001032 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001033 return SRE_ERROR_ILLEGAL;
1034 }
1035 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001036
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001037 /* shouldn't end up here */
1038 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001039}
1040
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001041LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001042SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1043{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001044 SRE_CHAR* ptr = state->start;
1045 SRE_CHAR* end = state->end;
1046 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001047 int prefix_len = 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001048 int prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001049 SRE_CODE* prefix = NULL;
1050 SRE_CODE* charset = NULL;
1051 SRE_CODE* overlap = NULL;
1052 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001053
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001054 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001055 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001056 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001057
1058 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001059
1060 if (pattern[3] > 0) {
1061 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001062 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001063 end -= pattern[3]-1;
1064 if (end <= ptr)
1065 end = ptr+1;
1066 }
1067
Fredrik Lundh3562f112000-07-02 12:00:07 +00001068 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001069 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001070 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001071 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001072 prefix_skip = pattern[6];
1073 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001074 overlap = prefix + prefix_len - 1;
1075 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001076 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001077 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001078 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001079
1080 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001081 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001082
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001083 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1084 TRACE(("charset = %p\n", charset));
1085
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001086#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001087 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001088 /* pattern starts with a known prefix. use the overlap
1089 table to skip forward as fast as we possibly can */
1090 int i = 0;
1091 end = state->end;
1092 while (ptr < end) {
1093 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001094 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001095 if (!i)
1096 break;
1097 else
1098 i = overlap[i];
1099 } else {
1100 if (++i == prefix_len) {
1101 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001102 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1103 state->start = ptr + 1 - prefix_len;
1104 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001105 if (flags & SRE_INFO_LITERAL)
1106 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001107 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001108 if (status != 0)
1109 return status;
1110 /* close but no cigar -- try again */
1111 i = overlap[i];
1112 }
1113 break;
1114 }
1115
1116 }
1117 ptr++;
1118 }
1119 return 0;
1120 }
1121#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001122
Fredrik Lundh3562f112000-07-02 12:00:07 +00001123 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001124 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001125 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001126 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001127 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001128 for (;;) {
1129 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1130 ptr++;
1131 if (ptr == end)
1132 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001133 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001134 state->start = ptr;
1135 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001136 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001137 if (status != 0)
1138 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001139 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001140 } else if (charset) {
1141 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001142 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001143 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001144 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001145 ptr++;
1146 if (ptr == end)
1147 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001148 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001149 state->start = ptr;
1150 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001151 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001152 if (status != 0)
1153 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001154 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001155 }
1156 } else
1157 /* general case */
1158 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001159 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001160 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001161 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001162 if (status != 0)
1163 break;
1164 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001165
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001166 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001167}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001168
Guido van Rossumb700df92000-03-31 14:59:30 +00001169
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001170#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001171
1172/* -------------------------------------------------------------------- */
1173/* factories and destructors */
1174
1175/* see sre.h for object declarations */
1176
1177staticforward PyTypeObject Pattern_Type;
1178staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001179staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001180
1181static PyObject *
1182_compile(PyObject* self_, PyObject* args)
1183{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001184 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001185
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001186 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001187 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001188
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001189 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001190 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001191 PyObject* code;
1192 int groups = 0;
1193 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001194 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001195 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001196 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001197 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001198
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001199 code = PySequence_Fast(code, "code argument must be a sequence");
1200 if (!code)
1201 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001202
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001203#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001204 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001205#else
1206 n = PySequence_Length(code);
1207#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001208
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001209 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1210 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001211 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001212 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001213 }
1214
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001215 for (i = 0; i < n; i++) {
1216 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001217 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001218 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001219
1220 Py_DECREF(code);
1221
1222 if (PyErr_Occurred())
1223 return NULL;
1224
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001225 Py_INCREF(pattern);
1226 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001227
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001228 self->flags = flags;
1229
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001230 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001232 Py_XINCREF(groupindex);
1233 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001234
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001235 Py_XINCREF(indexgroup);
1236 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001237
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001238 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001239}
1240
1241static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001242sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001243{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001244 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001245}
1246
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001247static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001248sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001249{
1250 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001251 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001252 return NULL;
1253 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001254 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001255#if defined(HAVE_UNICODE)
1256 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001257 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001258#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001259 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001260}
1261
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001262LOCAL(void)
1263state_reset(SRE_STATE* state)
1264{
1265 int i;
1266
1267 state->lastmark = 0;
1268
1269 /* FIXME: dynamic! */
1270 for (i = 0; i < SRE_MARK_SIZE; i++)
1271 state->mark[i] = NULL;
1272
1273 state->lastindex = -1;
1274
1275 state->repeat = NULL;
1276
1277 mark_fini(state);
1278}
1279
Guido van Rossumb700df92000-03-31 14:59:30 +00001280LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001281state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1282 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001283{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001284 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001285
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001286 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001287 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001288 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001289
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001290 memset(state, 0, sizeof(SRE_STATE));
1291
1292 state->lastindex = -1;
1293
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001294 /* get pointer to string buffer */
1295 buffer = string->ob_type->tp_as_buffer;
1296 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1297 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001298 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001299 return NULL;
1300 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001301
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001302 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001303 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1304 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001305 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1306 return NULL;
1307 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001308
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001309 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001310
1311#if PY_VERSION_HEX >= 0x01060000
1312 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001313#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001314 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001315#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001316
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001317 if (PyString_Check(string) || bytes == size)
1318 state->charsize = 1;
1319#if defined(HAVE_UNICODE)
1320 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1321 state->charsize = sizeof(Py_UNICODE);
1322#endif
1323 else {
1324 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1325 return NULL;
1326 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001327
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001328 /* adjust boundaries */
1329 if (start < 0)
1330 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001331 else if (start > size)
1332 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001333
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001334 if (end < 0)
1335 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001336 else if (end > size)
1337 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001338
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001339 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001340
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001341 state->start = (void*) ((char*) ptr + start * state->charsize);
1342 state->end = (void*) ((char*) ptr + end * state->charsize);
1343
1344 Py_INCREF(string);
1345 state->string = string;
1346 state->pos = start;
1347 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001348
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001349 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001350 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001351#if defined(HAVE_UNICODE)
1352 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001353 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001354#endif
1355 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001356 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001357
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001358 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001359}
1360
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001361LOCAL(void)
1362state_fini(SRE_STATE* state)
1363{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001364 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001365 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001366}
1367
1368LOCAL(PyObject*)
1369state_getslice(SRE_STATE* state, int index, PyObject* string)
1370{
Fredrik Lundh58100642000-08-09 09:14:35 +00001371 int i, j;
1372
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001373 index = (index - 1) * 2;
1374
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001375 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001376 i = j = 0;
1377 } else {
1378 i = ((char*)state->mark[index] - (char*)state->beginning) /
1379 state->charsize;
1380 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1381 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001382 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001383
Fredrik Lundh58100642000-08-09 09:14:35 +00001384 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001385}
1386
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001387static void
1388pattern_error(int status)
1389{
1390 switch (status) {
1391 case SRE_ERROR_RECURSION_LIMIT:
1392 PyErr_SetString(
1393 PyExc_RuntimeError,
1394 "maximum recursion limit exceeded"
1395 );
1396 break;
1397 case SRE_ERROR_MEMORY:
1398 PyErr_NoMemory();
1399 break;
1400 default:
1401 /* other error codes indicate compiler/engine bugs */
1402 PyErr_SetString(
1403 PyExc_RuntimeError,
1404 "internal error in regular expression engine"
1405 );
1406 }
1407}
1408
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001409static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001410pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001411{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001412 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001413
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001414 MatchObject* match;
1415 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001416 char* base;
1417 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001418
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001419 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001420
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001421 /* create match object (with room for extra group marks) */
1422 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001423 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001424 if (!match)
1425 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001427 Py_INCREF(pattern);
1428 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001430 Py_INCREF(state->string);
1431 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001433 match->regs = NULL;
1434 match->groups = pattern->groups+1;
1435
1436 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001437
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001438 base = (char*) state->beginning;
1439 n = state->charsize;
1440
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001441 match->mark[0] = ((char*) state->start - base) / n;
1442 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001443
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001444 for (i = j = 0; i < pattern->groups; i++, j+=2)
1445 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1446 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1447 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1448 } else
1449 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1450
1451 match->pos = state->pos;
1452 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001453
Fredrik Lundh6f013982000-07-03 18:44:21 +00001454 match->lastindex = state->lastindex;
1455
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001456 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001457
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001458 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001459
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001460 /* no match */
1461 Py_INCREF(Py_None);
1462 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001463
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001464 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001465
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001466 /* internal error */
1467 pattern_error(status);
1468 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001469}
1470
1471static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001472pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001473{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001474 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001475
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001476 ScannerObject* self;
1477
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001478 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001479 int start = 0;
1480 int end = INT_MAX;
1481 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1482 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001483
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001484 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001485 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001486 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001487 return NULL;
1488
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001489 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001490 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001491 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001492 return NULL;
1493 }
1494
1495 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001496 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001497
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001498 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001499}
1500
Guido van Rossumb700df92000-03-31 14:59:30 +00001501static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001502pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001503{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001504 Py_XDECREF(self->pattern);
1505 Py_XDECREF(self->groupindex);
1506 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001507}
1508
1509static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001510pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001511{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001512 SRE_STATE state;
1513 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001514
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001515 PyObject* string;
1516 int start = 0;
1517 int end = INT_MAX;
1518 if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
1519 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001520
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001521 string = state_init(&state, self, string, start, end);
1522 if (!string)
1523 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001524
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001525 state.ptr = state.start;
1526
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001527 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1528
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001529 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001530 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001531 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001532#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001533 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001534#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001535 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001536
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001537 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1538
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001539 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001540
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001541 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001542}
1543
1544static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001545pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001546{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001547 SRE_STATE state;
1548 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001549
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001550 PyObject* string;
1551 int start = 0;
1552 int end = INT_MAX;
1553 if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
1554 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001555
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001556 string = state_init(&state, self, string, start, end);
1557 if (!string)
1558 return NULL;
1559
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001560 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1561
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001562 if (state.charsize == 1) {
1563 status = sre_search(&state, PatternObject_GetCode(self));
1564 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001565#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001566 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001567#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001568 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001569
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001570 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1571
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001573
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001574 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001575}
1576
1577static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001578call(char* function, PyObject* args)
1579{
1580 PyObject* name;
1581 PyObject* module;
1582 PyObject* func;
1583 PyObject* result;
1584
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001585 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001586 if (!name)
1587 return NULL;
1588 module = PyImport_Import(name);
1589 Py_DECREF(name);
1590 if (!module)
1591 return NULL;
1592 func = PyObject_GetAttrString(module, function);
1593 Py_DECREF(module);
1594 if (!func)
1595 return NULL;
1596 result = PyObject_CallObject(func, args);
1597 Py_DECREF(func);
1598 Py_DECREF(args);
1599 return result;
1600}
1601
1602static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001603pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001604{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001605 PyObject* template;
1606 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001607 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001608 if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
1609 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001610
1611 /* delegate to Python code */
1612 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1613}
1614
1615static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001616pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001617{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001618 PyObject* template;
1619 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001620 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001621 if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
1622 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001623
1624 /* delegate to Python code */
1625 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1626}
1627
1628static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001629pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001630{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001631 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001632 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001633 if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
1634 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001635
1636 /* delegate to Python code */
1637 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1638}
1639
1640static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001641pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001642{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001643 SRE_STATE state;
1644 PyObject* list;
1645 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001646 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001648 PyObject* string;
1649 int start = 0;
1650 int end = INT_MAX;
1651 if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
1652 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001653
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001654 string = state_init(&state, self, string, start, end);
1655 if (!string)
1656 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001657
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001658 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001659
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001660 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001661
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001662 PyObject* item;
1663
1664 state.ptr = state.start;
1665
1666 if (state.charsize == 1) {
1667 status = sre_search(&state, PatternObject_GetCode(self));
1668 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001669#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001670 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001671#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001672 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001673
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001674 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001675
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001676 /* don't bother to build a match object */
1677 switch (self->groups) {
1678 case 0:
1679 item = PySequence_GetSlice(
1680 string,
1681 ((char*) state.start - (char*) state.beginning) /
1682 state.charsize,
1683 ((char*) state.ptr - (char*) state.beginning) /
1684 state.charsize);
1685 if (!item)
1686 goto error;
1687 break;
1688 case 1:
1689 item = state_getslice(&state, 1, string);
1690 if (!item)
1691 goto error;
1692 break;
1693 default:
1694 item = PyTuple_New(self->groups);
1695 if (!item)
1696 goto error;
1697 for (i = 0; i < self->groups; i++) {
1698 PyObject* o = state_getslice(&state, i+1, string);
1699 if (!o) {
1700 Py_DECREF(item);
1701 goto error;
1702 }
1703 PyTuple_SET_ITEM(item, i, o);
1704 }
1705 break;
1706 }
1707
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001708 status = PyList_Append(list, item);
1709 Py_DECREF(item);
1710
1711 if (status < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001712 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001713
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001714 if (state.ptr == state.start)
1715 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001716 else
1717 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001718
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001719 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001720
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001721 if (status == 0)
1722 break;
1723
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001724 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001725 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001727 }
1728 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001729
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001730 state_fini(&state);
1731 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001732
1733error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001734 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001735 state_fini(&state);
1736 return NULL;
1737
Guido van Rossumb700df92000-03-31 14:59:30 +00001738}
1739
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001740static PyMethodDef pattern_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001741 {"match", (PyCFunction) pattern_match, 1},
1742 {"search", (PyCFunction) pattern_search, 1},
1743 {"sub", (PyCFunction) pattern_sub, 1},
1744 {"subn", (PyCFunction) pattern_subn, 1},
1745 {"split", (PyCFunction) pattern_split, 1},
1746 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001747 /* experimental */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001748 {"scanner", (PyCFunction) pattern_scanner, 1},
1749 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001750};
1751
1752static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001753pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001754{
1755 PyObject* res;
1756
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001757 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001759 if (res)
1760 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001761
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001762 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001763
1764 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001765 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001766 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001767 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001768 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001769
1770 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001771 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001772
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001773 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001774 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001775
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001776 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001777 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001778 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001779 }
1780
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001781 PyErr_SetString(PyExc_AttributeError, name);
1782 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001783}
1784
1785statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001786 PyObject_HEAD_INIT(NULL)
1787 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001788 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001789 (destructor)pattern_dealloc, /*tp_dealloc*/
1790 0, /*tp_print*/
1791 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001792};
1793
1794/* -------------------------------------------------------------------- */
1795/* match methods */
1796
1797static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001798match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001799{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001800 Py_XDECREF(self->regs);
1801 Py_XDECREF(self->string);
1802 Py_DECREF(self->pattern);
1803 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001804}
1805
1806static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001807match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001808{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001809 if (index < 0 || index >= self->groups) {
1810 /* raise IndexError if we were given a bad group number */
1811 PyErr_SetString(
1812 PyExc_IndexError,
1813 "no such group"
1814 );
1815 return NULL;
1816 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001817
Fredrik Lundh6f013982000-07-03 18:44:21 +00001818 index *= 2;
1819
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001820 if (self->string == Py_None || self->mark[index] < 0) {
1821 /* return default value if the string or group is undefined */
1822 Py_INCREF(def);
1823 return def;
1824 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001825
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001826 return PySequence_GetSlice(
1827 self->string, self->mark[index], self->mark[index+1]
1828 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001829}
1830
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001831static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001832match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001833{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001834 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001835
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001836 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001837 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001838
Fredrik Lundh6f013982000-07-03 18:44:21 +00001839 i = -1;
1840
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001841 if (self->pattern->groupindex) {
1842 index = PyObject_GetItem(self->pattern->groupindex, index);
1843 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001844 if (PyInt_Check(index))
1845 i = (int) PyInt_AS_LONG(index);
1846 Py_DECREF(index);
1847 } else
1848 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001849 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001850
1851 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001852}
1853
1854static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001855match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001856{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001857 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001858}
1859
1860static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001861match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001862{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001863 PyObject* result;
1864 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001865
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001866 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001867
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001868 switch (size) {
1869 case 0:
1870 result = match_getslice(self, Py_False, Py_None);
1871 break;
1872 case 1:
1873 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1874 break;
1875 default:
1876 /* fetch multiple items */
1877 result = PyTuple_New(size);
1878 if (!result)
1879 return NULL;
1880 for (i = 0; i < size; i++) {
1881 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001882 self, PyTuple_GET_ITEM(args, i), Py_None
1883 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001884 if (!item) {
1885 Py_DECREF(result);
1886 return NULL;
1887 }
1888 PyTuple_SET_ITEM(result, i, item);
1889 }
1890 break;
1891 }
1892 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001893}
1894
1895static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001896match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001897{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001898 PyObject* result;
1899 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001900
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001901 PyObject* def = Py_None;
1902 if (!PyArg_ParseTuple(args, "|O:groups", &def))
1903 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001904
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001905 result = PyTuple_New(self->groups-1);
1906 if (!result)
1907 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001908
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001909 for (index = 1; index < self->groups; index++) {
1910 PyObject* item;
1911 item = match_getslice_by_index(self, index, def);
1912 if (!item) {
1913 Py_DECREF(result);
1914 return NULL;
1915 }
1916 PyTuple_SET_ITEM(result, index-1, item);
1917 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001918
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001919 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001920}
1921
1922static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001923match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001924{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001925 PyObject* result;
1926 PyObject* keys;
1927 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001928
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001929 PyObject* def = Py_None;
1930 if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
1931 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001932
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001933 result = PyDict_New();
1934 if (!result || !self->pattern->groupindex)
1935 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001936
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001937 keys = PyMapping_Keys(self->pattern->groupindex);
1938 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001939 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001940 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001941 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001942
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001943 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1944 PyObject* key;
1945 PyObject* item;
1946 key = PyList_GET_ITEM(keys, index);
1947 if (!key) {
1948 Py_DECREF(keys);
1949 Py_DECREF(result);
1950 return NULL;
1951 }
1952 item = match_getslice(self, key, def);
1953 if (!item) {
1954 Py_DECREF(key);
1955 Py_DECREF(keys);
1956 Py_DECREF(result);
1957 return NULL;
1958 }
1959 /* FIXME: <fl> this can fail, right? */
1960 PyDict_SetItem(result, key, item);
1961 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001962
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001963 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001964
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001965 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001966}
1967
1968static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001969match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001970{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001971 int index;
1972
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001973 PyObject* index_ = Py_False; /* zero */
1974 if (!PyArg_ParseTuple(args, "|O:start", &index_))
1975 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001976
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001977 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001978
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001979 if (index < 0 || index >= self->groups) {
1980 PyErr_SetString(
1981 PyExc_IndexError,
1982 "no such group"
1983 );
1984 return NULL;
1985 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001986
Fredrik Lundh510c97b2000-09-02 16:36:57 +00001987 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001988 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00001989}
1990
1991static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001992match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001993{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001994 int index;
1995
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001996 PyObject* index_ = Py_False; /* zero */
1997 if (!PyArg_ParseTuple(args, "|O:end", &index_))
1998 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001999
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002000 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002001
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002002 if (index < 0 || index >= self->groups) {
2003 PyErr_SetString(
2004 PyExc_IndexError,
2005 "no such group"
2006 );
2007 return NULL;
2008 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002009
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002010 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002011 return Py_BuildValue("i", self->mark[index*2+1]);
2012}
2013
2014LOCAL(PyObject*)
2015_pair(int i1, int i2)
2016{
2017 PyObject* pair;
2018 PyObject* item;
2019
2020 pair = PyTuple_New(2);
2021 if (!pair)
2022 return NULL;
2023
2024 item = PyInt_FromLong(i1);
2025 if (!item)
2026 goto error;
2027 PyTuple_SET_ITEM(pair, 0, item);
2028
2029 item = PyInt_FromLong(i2);
2030 if (!item)
2031 goto error;
2032 PyTuple_SET_ITEM(pair, 1, item);
2033
2034 return pair;
2035
2036 error:
2037 Py_DECREF(pair);
2038 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002039}
2040
2041static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002042match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002043{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002044 int index;
2045
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002046 PyObject* index_ = Py_False; /* zero */
2047 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2048 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002049
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002050 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002051
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002052 if (index < 0 || index >= self->groups) {
2053 PyErr_SetString(
2054 PyExc_IndexError,
2055 "no such group"
2056 );
2057 return NULL;
2058 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002059
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002060 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002061 return _pair(self->mark[index*2], self->mark[index*2+1]);
2062}
2063
2064static PyObject*
2065match_regs(MatchObject* self)
2066{
2067 PyObject* regs;
2068 PyObject* item;
2069 int index;
2070
2071 regs = PyTuple_New(self->groups);
2072 if (!regs)
2073 return NULL;
2074
2075 for (index = 0; index < self->groups; index++) {
2076 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2077 if (!item) {
2078 Py_DECREF(regs);
2079 return NULL;
2080 }
2081 PyTuple_SET_ITEM(regs, index, item);
2082 }
2083
2084 Py_INCREF(regs);
2085 self->regs = regs;
2086
2087 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002088}
2089
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002090static PyMethodDef match_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002091 {"group", (PyCFunction) match_group, 1},
2092 {"start", (PyCFunction) match_start, 1},
2093 {"end", (PyCFunction) match_end, 1},
2094 {"span", (PyCFunction) match_span, 1},
2095 {"groups", (PyCFunction) match_groups, 1},
2096 {"groupdict", (PyCFunction) match_groupdict, 1},
2097 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002098};
2099
2100static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002101match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002102{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002103 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002104
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002105 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2106 if (res)
2107 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002108
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002109 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002110
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002111 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002112 if (self->lastindex >= 0)
2113 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002114 Py_INCREF(Py_None);
2115 return Py_None;
2116 }
2117
2118 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002119 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002120 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002121 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002122 );
2123 if (result)
2124 return result;
2125 PyErr_Clear();
2126 }
2127 Py_INCREF(Py_None);
2128 return Py_None;
2129 }
2130
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002131 if (!strcmp(name, "string")) {
2132 if (self->string) {
2133 Py_INCREF(self->string);
2134 return self->string;
2135 } else {
2136 Py_INCREF(Py_None);
2137 return Py_None;
2138 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002139 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002140
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 if (!strcmp(name, "regs")) {
2142 if (self->regs) {
2143 Py_INCREF(self->regs);
2144 return self->regs;
2145 } else
2146 return match_regs(self);
2147 }
2148
2149 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002150 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002151 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002152 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002153
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002154 if (!strcmp(name, "pos"))
2155 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002157 if (!strcmp(name, "endpos"))
2158 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002160 PyErr_SetString(PyExc_AttributeError, name);
2161 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002162}
2163
2164/* FIXME: implement setattr("string", None) as a special case (to
2165 detach the associated string, if any */
2166
2167statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002168 PyObject_HEAD_INIT(NULL)
2169 0, "SRE_Match",
2170 sizeof(MatchObject), sizeof(int),
2171 (destructor)match_dealloc, /*tp_dealloc*/
2172 0, /*tp_print*/
2173 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002174};
2175
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002176/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002177/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002178
2179static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002180scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002181{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002182 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002183 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002184 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002185}
2186
2187static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002188scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002189{
2190 SRE_STATE* state = &self->state;
2191 PyObject* match;
2192 int status;
2193
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002194 state_reset(state);
2195
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002196 state->ptr = state->start;
2197
2198 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002199 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002200 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002201#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002202 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002203#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002204 }
2205
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002206 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002208
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002209 if (status == 0 || state->ptr == state->start)
2210 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002211 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002212 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002213
2214 return match;
2215}
2216
2217
2218static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002219scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002220{
2221 SRE_STATE* state = &self->state;
2222 PyObject* match;
2223 int status;
2224
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002225 state_reset(state);
2226
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002227 state->ptr = state->start;
2228
2229 if (state->charsize == 1) {
2230 status = sre_search(state, PatternObject_GetCode(self->pattern));
2231 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002232#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002233 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002234#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002235 }
2236
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002237 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002238 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002239
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002240 if (status == 0 || state->ptr == state->start)
2241 state->start = (void*) ((char*) state->ptr + state->charsize);
2242 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002243 state->start = state->ptr;
2244
2245 return match;
2246}
2247
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002248static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002249 {"match", (PyCFunction) scanner_match, 0},
2250 {"search", (PyCFunction) scanner_search, 0},
2251 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002252};
2253
2254static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002255scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002256{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002257 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002258
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002259 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2260 if (res)
2261 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002262
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002263 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002264
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002265 /* attributes */
2266 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002267 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002268 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002269 }
2270
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002271 PyErr_SetString(PyExc_AttributeError, name);
2272 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002273}
2274
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002275statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002276 PyObject_HEAD_INIT(NULL)
2277 0, "SRE_Scanner",
2278 sizeof(ScannerObject), 0,
2279 (destructor)scanner_dealloc, /*tp_dealloc*/
2280 0, /*tp_print*/
2281 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002282};
2283
Guido van Rossumb700df92000-03-31 14:59:30 +00002284static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002285 {"compile", _compile, 1},
2286 {"getcodesize", sre_codesize, 1},
2287 {"getlower", sre_getlower, 1},
2288 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002289};
2290
2291void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002292#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002293__declspec(dllexport)
2294#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002295init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002296{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002297 /* Patch object types */
2298 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002299 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002300
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002301 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002302}
2303
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002304#endif /* !defined(SRE_RECURSIVE) */