blob: 3b78fb97e8030d6eb70f64231061ecc60ae35fb0 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00007 * 99-10-24 fl created (based on existing template matcher code)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00008 * 00-03-06 fl first alpha, sort of (0.5)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00009 * 00-06-30 fl added fast search optimization (0.9.3)
10 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11 * 00-07-02 fl added charset optimizations, etc (0.9.5)
12 * 00-07-03 fl store code in pattern object, lookbehind, etc
13 * 00-07-08 fl added regs attribute
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000014 * 00-07-21 fl reset lastindex in scanner methods (0.9.6)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000015 * 00-08-01 fl fixes for 1.6b1 (0.9.8)
Fredrik Lundh96ab4652000-08-03 16:29:50 +000016 * 00-08-03 fl added recursion limit
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000017 * 00-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh58100642000-08-09 09:14:35 +000018 * 00-08-08 fl changed findall to return empty strings instead of None
Guido van Rossumb700df92000-03-31 14:59:30 +000019 *
20 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
21 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000022 * This version of the SRE library can be redistributed under CNRI's
23 * Python 1.6 license. For any other use, please contact Secret Labs
24 * AB (info@pythonware.com).
25 *
Guido van Rossumb700df92000-03-31 14:59:30 +000026 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000027 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000028 * other compatibility work.
29 */
30
31#ifndef SRE_RECURSIVE
32
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000034
35#include "Python.h"
36
37#include "sre.h"
38
Guido van Rossumb700df92000-03-31 14:59:30 +000039#if defined(HAVE_LIMITS_H)
40#include <limits.h>
41#else
42#define INT_MAX 2147483647
43#endif
44
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000045#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000046
Fredrik Lundh436c3d52000-06-29 08:58:44 +000047/* name of this module, minus the leading underscore */
48#define MODULE "sre"
49
Guido van Rossumb700df92000-03-31 14:59:30 +000050/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000051#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000052
Fredrik Lundh436c3d52000-06-29 08:58:44 +000053#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000054/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000055#define HAVE_UNICODE
56#endif
57
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000058/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000059/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060
Trent Mick239548f2000-08-16 22:29:55 +000061/* prevent run-away recursion (bad patterns on long strings)
62 Require a smaller recursion limit for a number of 64-bit platforms
63 to prevent stack overflow:
64 Win64 - MS_WIN64, Linux64 - __LP64__, Monterey (64-bit AIX) - _LP64
65 XXX Or maybe this should be defined for all SIZEOF_VOIDP>4 platforms?
66*/
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000067#if !defined(USE_STACKCHECK)
Trent Mick239548f2000-08-16 22:29:55 +000068# if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
69# define USE_RECURSION_LIMIT 7500
70# else
71# define USE_RECURSION_LIMIT 10000
72# endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000073#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000074
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define USE_FAST_SEARCH
77
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000079#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080
81/* -------------------------------------------------------------------- */
82
Fredrik Lundh80946112000-06-29 18:03:25 +000083#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000084#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000085#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000086/* fastest possible local call under MSVC */
87#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000089#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000090#else
91#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000092#endif
93
94/* error codes */
95#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000096#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000097#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000098#define SRE_ERROR_MEMORY -9 /* out of memory */
99
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000101#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000102#else
103#define TRACE(v)
104#endif
105
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000106/* -------------------------------------------------------------------- */
107/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000108
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000109/* default character predicates (run sre_chars.py to regenerate tables) */
110
111#define SRE_DIGIT_MASK 1
112#define SRE_SPACE_MASK 2
113#define SRE_LINEBREAK_MASK 4
114#define SRE_ALNUM_MASK 8
115#define SRE_WORD_MASK 16
116
117static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1182, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12025, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1220, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
124
Fredrik Lundhb389df32000-06-29 12:48:37 +0000125static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000012610, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12727, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12844, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12961, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
130108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
131122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
132106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
133120, 121, 122, 123, 124, 125, 126, 127 };
134
Fredrik Lundhb389df32000-06-29 12:48:37 +0000135static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000136{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000137 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000138}
139
140#define SRE_IS_DIGIT(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
142#define SRE_IS_SPACE(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
144#define SRE_IS_LINEBREAK(ch)\
145 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
146#define SRE_IS_ALNUM(ch)\
147 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
148#define SRE_IS_WORD(ch)\
149 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000150
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000151/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000152
Fredrik Lundhb389df32000-06-29 12:48:37 +0000153static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000154{
155 return ((ch) < 256 ? tolower((ch)) : ch);
156}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000157#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
158#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
159#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
160#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
161#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
162
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000163/* unicode-specific character predicates */
164
165#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000166static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000167{
168 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
169}
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000170#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
171#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
172#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000173#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000175#endif
176
Guido van Rossumb700df92000-03-31 14:59:30 +0000177LOCAL(int)
178sre_category(SRE_CODE category, unsigned int ch)
179{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000180 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_DIGIT:
183 return SRE_IS_DIGIT(ch);
184 case SRE_CATEGORY_NOT_DIGIT:
185 return !SRE_IS_DIGIT(ch);
186 case SRE_CATEGORY_SPACE:
187 return SRE_IS_SPACE(ch);
188 case SRE_CATEGORY_NOT_SPACE:
189 return !SRE_IS_SPACE(ch);
190 case SRE_CATEGORY_WORD:
191 return SRE_IS_WORD(ch);
192 case SRE_CATEGORY_NOT_WORD:
193 return !SRE_IS_WORD(ch);
194 case SRE_CATEGORY_LINEBREAK:
195 return SRE_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_NOT_LINEBREAK:
197 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000198
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000199 case SRE_CATEGORY_LOC_WORD:
200 return SRE_LOC_IS_WORD(ch);
201 case SRE_CATEGORY_LOC_NOT_WORD:
202 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000203
204#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000205 case SRE_CATEGORY_UNI_DIGIT:
206 return SRE_UNI_IS_DIGIT(ch);
207 case SRE_CATEGORY_UNI_NOT_DIGIT:
208 return !SRE_UNI_IS_DIGIT(ch);
209 case SRE_CATEGORY_UNI_SPACE:
210 return SRE_UNI_IS_SPACE(ch);
211 case SRE_CATEGORY_UNI_NOT_SPACE:
212 return !SRE_UNI_IS_SPACE(ch);
213 case SRE_CATEGORY_UNI_WORD:
214 return SRE_UNI_IS_WORD(ch);
215 case SRE_CATEGORY_UNI_NOT_WORD:
216 return !SRE_UNI_IS_WORD(ch);
217 case SRE_CATEGORY_UNI_LINEBREAK:
218 return SRE_UNI_IS_LINEBREAK(ch);
219 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
220 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000221#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000222 }
223 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000224}
225
226/* helpers */
227
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000228static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000229mark_fini(SRE_STATE* state)
230{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000231 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000232 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000233 state->mark_stack = NULL;
234 }
235 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000236}
237
238static int
239mark_save(SRE_STATE* state, int lo, int hi)
240{
241 void* stack;
242 int size;
243 int minsize, newsize;
244
245 if (hi <= lo)
246 return 0;
247
248 size = (hi - lo) + 1;
249
250 newsize = state->mark_stack_size;
251 minsize = state->mark_stack_base + size;
252
253 if (newsize < minsize) {
254 /* create new stack */
255 if (!newsize) {
256 newsize = 512;
257 if (newsize < minsize)
258 newsize = minsize;
259 TRACE(("allocate stack %d\n", newsize));
260 stack = malloc(sizeof(void*) * newsize);
261 } else {
262 /* grow the stack */
263 while (newsize < minsize)
264 newsize += newsize;
265 TRACE(("grow stack to %d\n", newsize));
266 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
267 }
268 if (!stack) {
269 mark_fini(state);
270 return SRE_ERROR_MEMORY;
271 }
272 state->mark_stack = stack;
273 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000274 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000275
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000276 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000277
278 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
279 size * sizeof(void*));
280
281 state->mark_stack_base += size;
282
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000283 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000284}
285
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000286static int
287mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000288{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000289 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000290
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000291 if (hi <= lo)
292 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000293
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000295
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000296 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000297
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000298 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000299
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000300 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
301 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000302
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000303 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000304}
305
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000306/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000307
308#define SRE_CHAR unsigned char
309#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000310#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000311#define SRE_CHARSET sre_charset
312#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000313#define SRE_MATCH sre_match
314#define SRE_SEARCH sre_search
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000315
316#if defined(HAVE_UNICODE)
317
Guido van Rossumb700df92000-03-31 14:59:30 +0000318#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000319#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000320#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000321
Guido van Rossumb700df92000-03-31 14:59:30 +0000322#undef SRE_SEARCH
323#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000324#undef SRE_INFO
325#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000326#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000327#undef SRE_AT
328#undef SRE_CHAR
329
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000330/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000331
332#define SRE_CHAR Py_UNICODE
333#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000334#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000335#define SRE_CHARSET sre_ucharset
336#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000337#define SRE_MATCH sre_umatch
338#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000339#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
341#endif /* SRE_RECURSIVE */
342
343/* -------------------------------------------------------------------- */
344/* String matching engine */
345
346/* the following section is compiled twice, with different character
347 settings */
348
349LOCAL(int)
350SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
351{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000352 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000353
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000354 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000355
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000356 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000357
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000358 case SRE_AT_BEGINNING:
359 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000360
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000361 case SRE_AT_BEGINNING_LINE:
362 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000363 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000364
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000365 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000366 return (((void*) (ptr+1) == state->end &&
367 SRE_IS_LINEBREAK((int) ptr[0])) ||
368 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000369
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000370 case SRE_AT_END_LINE:
371 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000372 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000374 case SRE_AT_BOUNDARY:
375 if (state->beginning == state->end)
376 return 0;
377 that = ((void*) ptr > state->beginning) ?
378 SRE_IS_WORD((int) ptr[-1]) : 0;
379 this = ((void*) ptr < state->end) ?
380 SRE_IS_WORD((int) ptr[0]) : 0;
381 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000382
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000383 case SRE_AT_NON_BOUNDARY:
384 if (state->beginning == state->end)
385 return 0;
386 that = ((void*) ptr > state->beginning) ?
387 SRE_IS_WORD((int) ptr[-1]) : 0;
388 this = ((void*) ptr < state->end) ?
389 SRE_IS_WORD((int) ptr[0]) : 0;
390 return this == that;
391 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394}
395
396LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000397SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000398{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000400
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000402
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000403 for (;;) {
404 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000405
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000406 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000407 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000408 if (ch == set[0])
409 return ok;
410 set++;
411 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000414 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000415 if (set[0] <= ch && ch <= set[1])
416 return ok;
417 set += 2;
418 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000419
Fredrik Lundh3562f112000-07-02 12:00:07 +0000420 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000421 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000422 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
423 return ok;
424 set += 16;
425 break;
426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000427 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000428 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 if (sre_category(set[0], (int) ch))
430 return ok;
431 set += 1;
432 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000433
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000434 case SRE_OP_NEGATE:
435 ok = !ok;
436 break;
437
438 case SRE_OP_FAILURE:
439 return !ok;
440
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000441 default:
442 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000443 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 return 0;
445 }
446 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000447}
448
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000449LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
450
451LOCAL(int)
452SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
453{
454 SRE_CODE chr;
455 SRE_CHAR* ptr = state->ptr;
456 SRE_CHAR* end = state->end;
457 int i;
458
459 /* adjust end */
460 if (maxcount < end - ptr && maxcount != 65535)
461 end = ptr + maxcount;
462
463 switch (pattern[0]) {
464
465 case SRE_OP_ANY:
466 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000467 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000468 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
469 ptr++;
470 break;
471
472 case SRE_OP_ANY_ALL:
473 /* repeated dot wildcare. skip to the end of the target
474 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000475 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000476 ptr = end;
477 break;
478
479 case SRE_OP_LITERAL:
480 /* repeated literal */
481 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000482 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000483 while (ptr < end && (SRE_CODE) *ptr == chr)
484 ptr++;
485 break;
486
487 case SRE_OP_LITERAL_IGNORE:
488 /* repeated literal */
489 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000490 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000491 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
492 ptr++;
493 break;
494
495 case SRE_OP_NOT_LITERAL:
496 /* repeated non-literal */
497 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000498 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000499 while (ptr < end && (SRE_CODE) *ptr != chr)
500 ptr++;
501 break;
502
503 case SRE_OP_NOT_LITERAL_IGNORE:
504 /* repeated non-literal */
505 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000506 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000507 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
508 ptr++;
509 break;
510
511 case SRE_OP_IN:
512 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000513 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
514 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000515 ptr++;
516 break;
517
518 default:
519 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000520 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000521 while ((SRE_CHAR*) state->ptr < end) {
522 i = SRE_MATCH(state, pattern, level);
523 if (i < 0)
524 return i;
525 if (!i)
526 break;
527 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000528 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
529 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000530 return (SRE_CHAR*) state->ptr - ptr;
531 }
532
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000533 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000534 return ptr - (SRE_CHAR*) state->ptr;
535}
536
Guido van Rossumb700df92000-03-31 14:59:30 +0000537LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000538SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
539{
540 /* check if an SRE_OP_INFO block matches at the current position.
541 returns the number of SRE_CODE objects to skip if successful, 0
542 if no match */
543
544 SRE_CHAR* end = state->end;
545 SRE_CHAR* ptr = state->ptr;
546 int i;
547
548 /* check minimal length */
549 if (pattern[3] && (end - ptr) < pattern[3])
550 return 0;
551
552 /* check known prefix */
553 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
554 /* <length> <skip> <prefix data> <overlap data> */
555 for (i = 0; i < pattern[5]; i++)
556 if ((SRE_CODE) ptr[i] != pattern[7 + i])
557 return 0;
558 return pattern[0] + 2 * pattern[6];
559 }
560 return pattern[0];
561}
562
563LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000564SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000565{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000566 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000567 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000568
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000569 SRE_CHAR* end = state->end;
570 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000571 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000572 SRE_REPEAT* rp;
573 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000574 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000575
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000576 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000577
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000578 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000579
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000580#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000581 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000582 return SRE_ERROR_RECURSION_LIMIT;
583#endif
584
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000585#if defined(USE_RECURSION_LIMIT)
586 if (level > USE_RECURSION_LIMIT)
587 return SRE_ERROR_RECURSION_LIMIT;
588#endif
589
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000590 if (pattern[0] == SRE_OP_INFO) {
591 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000592 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000593 if (pattern[3] && (end - ptr) < pattern[3]) {
594 TRACE(("reject (got %d chars, need %d)\n",
595 (end - ptr), pattern[3]));
596 return 0;
597 }
598 pattern += pattern[1] + 1;
599 }
600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000601 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000602
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000603 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000605 case SRE_OP_FAILURE:
606 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000607 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000608 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000609
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000610 case SRE_OP_SUCCESS:
611 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000614 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000615
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000616 case SRE_OP_AT:
617 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000618 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000619 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000620 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000621 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000622 pattern++;
623 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000624
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000625 case SRE_OP_CATEGORY:
626 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000627 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000628 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000629 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000630 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000631 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000632 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000633 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000634
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000635 case SRE_OP_LITERAL:
636 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000637 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000638 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000639 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000640 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000641 pattern++;
642 ptr++;
643 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000644
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000645 case SRE_OP_NOT_LITERAL:
646 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000647 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000648 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000649 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000650 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000651 pattern++;
652 ptr++;
653 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000654
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000655 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000656 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000657 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000658 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000659 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
660 return 0;
661 ptr++;
662 break;
663
664 case SRE_OP_ANY_ALL:
665 /* match anything */
666 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000667 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000668 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000669 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000670 ptr++;
671 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000672
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000673 case SRE_OP_IN:
674 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000675 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000676 TRACE(("|%p|%p|IN\n", pattern, ptr));
677 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000678 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000679 pattern += pattern[0];
680 ptr++;
681 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000682
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000684 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000686 i = pattern[0];
687 {
688 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
689 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
690 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000691 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000692 while (p < e) {
693 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000694 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 p++; ptr++;
696 }
697 }
698 pattern++;
699 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000700
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000701 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000702 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000703 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000704 i = pattern[0];
705 {
706 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
707 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
708 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000709 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710 while (p < e) {
711 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000712 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000713 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000714 p++; ptr++;
715 }
716 }
717 pattern++;
718 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000719
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000720 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000721 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000722 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000723 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000724 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 pattern++;
726 ptr++;
727 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000728
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000730 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000731 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000732 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000733 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 pattern++;
735 ptr++;
736 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000737
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000739 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000741 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000742 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 pattern += pattern[0];
744 ptr++;
745 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000746
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000747 case SRE_OP_MARK:
748 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000749 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000750 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000751 i = pattern[0];
752 if (i & 1)
753 state->lastindex = i/2 + 1;
754 if (i > state->lastmark)
755 state->lastmark = i;
756 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 pattern++;
758 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000759
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000760 case SRE_OP_JUMP:
761 case SRE_OP_INFO:
762 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000763 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000764 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000765 pattern += pattern[0];
766 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000767
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 case SRE_OP_ASSERT:
769 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000770 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000771 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000772 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000773 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000774 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000775 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000776 if (i <= 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000777 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000778 if (pattern[1] > 0 && state->ptr != ptr)
779 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 pattern += pattern[0];
781 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000782
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000783 case SRE_OP_ASSERT_NOT:
784 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000785 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000786 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000787 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000788 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000789 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000790 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000791 if (i < 0)
792 return i;
793 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000794 return 0;
795 if (pattern[1] > 0 && state->ptr != ptr)
796 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000797 pattern += pattern[0];
798 break;
799
800 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000801 /* alternation */
802 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000803 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000804 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000805 for (; pattern[0]; pattern += pattern[0]) {
806 if (pattern[1] == SRE_OP_LITERAL &&
807 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
808 continue;
809 if (pattern[1] == SRE_OP_IN &&
810 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
811 continue;
812 state->ptr = ptr;
813 i = SRE_MATCH(state, pattern + 1, level + 1);
814 if (i)
815 return i;
816 if (state->lastmark > lastmark) {
817 memset(
818 state->mark + lastmark + 1, 0,
819 (state->lastmark - lastmark) * sizeof(void*)
820 );
821 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000822 }
823 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000824 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000825
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000826 case SRE_OP_REPEAT_ONE:
827 /* match repeated sequence (maximizing regexp) */
828
829 /* this operator only works if the repeated item is
830 exactly one character wide, and we're not already
831 collecting backtracking points. for other cases,
832 use the MAX_REPEAT operator instead */
833
834 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
835
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000836 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000837 pattern[1], pattern[2]));
838
Fredrik Lundhe1869832000-08-01 22:47:49 +0000839 if (ptr + pattern[1] > end)
840 return 0; /* cannot match */
841
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000842 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000843
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000844 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
845 if (count < 0)
846 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000847
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000848 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000849
850 /* when we arrive here, count contains the number of
851 matches, and ptr points to the tail of the target
852 string. check if the rest of the pattern matches,
853 and backtrack if not. */
854
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000855 if (count < (int) pattern[1])
856 return 0;
857
858 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
859 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000860 state->ptr = ptr;
861 return 1;
862
863 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
864 /* tail starts with a literal. skip positions where
865 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000866 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000867 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000868 while (count >= (int) pattern[1] &&
869 (ptr >= end || *ptr != chr)) {
870 ptr--;
871 count--;
872 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000873 if (count < (int) pattern[1])
874 break;
875 state->ptr = ptr;
876 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000877 if (i)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000878 return 1;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000879 ptr--;
880 count--;
881 }
882
883 } else {
884 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000885 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000886 while (count >= (int) pattern[1]) {
887 state->ptr = ptr;
888 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000889 if (i)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000890 return 1;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000891 ptr--;
892 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000893 if (state->lastmark > lastmark) {
894 memset(
895 state->mark + lastmark + 1, 0,
896 (state->lastmark - lastmark) * sizeof(void*)
897 );
898 state->lastmark = lastmark;
899 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000900 }
901 }
902 return 0;
903
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000904 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000905 /* create repeat context. all the hard work is done
906 by the UNTIL operator */
907 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000908 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000909 pattern[1], pattern[2]));
910
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000911 rep.count = -1;
912 rep.pattern = pattern;
913
914 /* install new repeat context */
915 rep.prev = state->repeat;
916 state->repeat = &rep;
917
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000918 state->ptr = ptr;
919 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000920
921 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000922
923 return i;
924
925 case SRE_OP_MAX_UNTIL:
926 /* maximizing repeat */
927 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
928
929 /* FIXME: we probably need to deal with zero-width
930 matches in here... */
931
932 rp = state->repeat;
933 if (!rp)
934 return SRE_ERROR_STATE;
935
936 state->ptr = ptr;
937
938 count = rp->count + 1;
939
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000940 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000941
942 if (count < rp->pattern[1]) {
943 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000944 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000945 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000946 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000947 if (i)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000948 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000949 rp->count = count - 1;
950 state->ptr = ptr;
951 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000952 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000953
954 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000955 /* we may have enough matches, but if we can
956 match another item, do so */
957 rp->count = count;
958 lastmark = state->lastmark;
959 mark_save(state, 0, lastmark);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000960 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000961 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000962 if (i)
963 return i;
964 mark_restore(state, 0, lastmark);
965 rp->count = count - 1;
966 state->ptr = ptr;
967 }
968
969 /* cannot match more repeated items here. make sure the
970 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000971 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000972 i = SRE_MATCH(state, pattern, level + 1);
973 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000974 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000975 state->repeat = rp;
976 return 0;
977
978 case SRE_OP_MIN_UNTIL:
979 /* minimizing repeat */
980 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
981
982 rp = state->repeat;
983 if (!rp)
984 return SRE_ERROR_STATE;
985
986 count = rp->count + 1;
987
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000988 TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000989
990 state->ptr = ptr;
991
992 if (count < rp->pattern[1]) {
993 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000994 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000995 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000996 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000997 if (i)
998 return i;
999 rp->count = count-1;
1000 state->ptr = ptr;
1001 return 0;
1002 }
1003
1004 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001005 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001006 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001007 if (i) {
1008 /* free(rp); */
1009 return i;
1010 }
1011 state->repeat = rp;
1012
1013 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1014 return 0;
1015
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001016 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001017 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001018 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001019 if (i)
1020 return i;
1021 rp->count = count - 1;
1022 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001023
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001024 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001025 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001026 return SRE_ERROR_ILLEGAL;
1027 }
1028 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001029
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001030 /* shouldn't end up here */
1031 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001032}
1033
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001034LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001035SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1036{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001037 SRE_CHAR* ptr = state->start;
1038 SRE_CHAR* end = state->end;
1039 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001040 int prefix_len = 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001041 int prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001042 SRE_CODE* prefix = NULL;
1043 SRE_CODE* charset = NULL;
1044 SRE_CODE* overlap = NULL;
1045 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001046
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001047 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001048 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001049 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001050
1051 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001052
1053 if (pattern[3] > 0) {
1054 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001055 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001056 end -= pattern[3]-1;
1057 if (end <= ptr)
1058 end = ptr+1;
1059 }
1060
Fredrik Lundh3562f112000-07-02 12:00:07 +00001061 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001062 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001063 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001064 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001065 prefix_skip = pattern[6];
1066 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001067 overlap = prefix + prefix_len - 1;
1068 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001069 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001070 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001071 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001072
1073 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001074 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001075
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001076 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1077 TRACE(("charset = %p\n", charset));
1078
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001079#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001080 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001081 /* pattern starts with a known prefix. use the overlap
1082 table to skip forward as fast as we possibly can */
1083 int i = 0;
1084 end = state->end;
1085 while (ptr < end) {
1086 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001087 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001088 if (!i)
1089 break;
1090 else
1091 i = overlap[i];
1092 } else {
1093 if (++i == prefix_len) {
1094 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001095 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1096 state->start = ptr + 1 - prefix_len;
1097 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001098 if (flags & SRE_INFO_LITERAL)
1099 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001100 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001101 if (status != 0)
1102 return status;
1103 /* close but no cigar -- try again */
1104 i = overlap[i];
1105 }
1106 break;
1107 }
1108
1109 }
1110 ptr++;
1111 }
1112 return 0;
1113 }
1114#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001115
Fredrik Lundh3562f112000-07-02 12:00:07 +00001116 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001117 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001118 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001119 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001120 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001121 for (;;) {
1122 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1123 ptr++;
1124 if (ptr == end)
1125 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001126 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001127 state->start = ptr;
1128 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001129 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001130 if (status != 0)
1131 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001132 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001133 } else if (charset) {
1134 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001135 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001136 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001137 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001138 ptr++;
1139 if (ptr == end)
1140 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001141 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001142 state->start = ptr;
1143 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001144 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001145 if (status != 0)
1146 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001147 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001148 }
1149 } else
1150 /* general case */
1151 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001152 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001153 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001154 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001155 if (status != 0)
1156 break;
1157 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001159 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001160}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001161
Guido van Rossumb700df92000-03-31 14:59:30 +00001162
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001163#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001164
1165/* -------------------------------------------------------------------- */
1166/* factories and destructors */
1167
1168/* see sre.h for object declarations */
1169
1170staticforward PyTypeObject Pattern_Type;
1171staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001172staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001173
1174static PyObject *
1175_compile(PyObject* self_, PyObject* args)
1176{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001177 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001179 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001180 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001182 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001183 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001184 PyObject* code;
1185 int groups = 0;
1186 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001187 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001188 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001189 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001190 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001191
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001192 code = PySequence_Fast(code, "code argument must be a sequence");
1193 if (!code)
1194 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001195
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001196#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001197 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001198#else
1199 n = PySequence_Length(code);
1200#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001201
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001202 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1203 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001204 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001205 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001206 }
1207
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001208 for (i = 0; i < n; i++) {
1209 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001210 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001211 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001212
1213 Py_DECREF(code);
1214
1215 if (PyErr_Occurred())
1216 return NULL;
1217
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001218 Py_INCREF(pattern);
1219 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001220
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001221 self->flags = flags;
1222
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001223 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001224
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001225 Py_XINCREF(groupindex);
1226 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001227
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001228 Py_XINCREF(indexgroup);
1229 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001230
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001231 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001232}
1233
1234static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001235sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001236{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001237 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001238}
1239
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001240static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001241sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001242{
1243 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001244 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001245 return NULL;
1246 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001247 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001248#if defined(HAVE_UNICODE)
1249 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001250 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001251#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001252 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001253}
1254
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001255LOCAL(void)
1256state_reset(SRE_STATE* state)
1257{
1258 int i;
1259
1260 state->lastmark = 0;
1261
1262 /* FIXME: dynamic! */
1263 for (i = 0; i < SRE_MARK_SIZE; i++)
1264 state->mark[i] = NULL;
1265
1266 state->lastindex = -1;
1267
1268 state->repeat = NULL;
1269
1270 mark_fini(state);
1271}
1272
Guido van Rossumb700df92000-03-31 14:59:30 +00001273LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001274state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1275 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001276{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001277 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001278
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001279 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001280 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001281 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001282
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001283 memset(state, 0, sizeof(SRE_STATE));
1284
1285 state->lastindex = -1;
1286
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001287 /* get pointer to string buffer */
1288 buffer = string->ob_type->tp_as_buffer;
1289 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1290 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001291 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001292 return NULL;
1293 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001294
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001295 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001296 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1297 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001298 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1299 return NULL;
1300 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001301
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001302 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001303
1304#if PY_VERSION_HEX >= 0x01060000
1305 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001306#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001307 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001308#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001309
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001310 if (PyString_Check(string) || bytes == size)
1311 state->charsize = 1;
1312#if defined(HAVE_UNICODE)
1313 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1314 state->charsize = sizeof(Py_UNICODE);
1315#endif
1316 else {
1317 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1318 return NULL;
1319 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001320
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001321 /* adjust boundaries */
1322 if (start < 0)
1323 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001324 else if (start > size)
1325 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001326
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001327 if (end < 0)
1328 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001329 else if (end > size)
1330 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001331
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001332 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001333
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001334 state->start = (void*) ((char*) ptr + start * state->charsize);
1335 state->end = (void*) ((char*) ptr + end * state->charsize);
1336
1337 Py_INCREF(string);
1338 state->string = string;
1339 state->pos = start;
1340 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001341
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001342 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001343 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001344#if defined(HAVE_UNICODE)
1345 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001346 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001347#endif
1348 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001349 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001350
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001351 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001352}
1353
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001354LOCAL(void)
1355state_fini(SRE_STATE* state)
1356{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001358 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001359}
1360
1361LOCAL(PyObject*)
1362state_getslice(SRE_STATE* state, int index, PyObject* string)
1363{
Fredrik Lundh58100642000-08-09 09:14:35 +00001364 int i, j;
1365
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001366 index = (index - 1) * 2;
1367
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001368 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001369 i = j = 0;
1370 } else {
1371 i = ((char*)state->mark[index] - (char*)state->beginning) /
1372 state->charsize;
1373 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1374 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001375 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001376
Fredrik Lundh58100642000-08-09 09:14:35 +00001377 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001378}
1379
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001380static void
1381pattern_error(int status)
1382{
1383 switch (status) {
1384 case SRE_ERROR_RECURSION_LIMIT:
1385 PyErr_SetString(
1386 PyExc_RuntimeError,
1387 "maximum recursion limit exceeded"
1388 );
1389 break;
1390 case SRE_ERROR_MEMORY:
1391 PyErr_NoMemory();
1392 break;
1393 default:
1394 /* other error codes indicate compiler/engine bugs */
1395 PyErr_SetString(
1396 PyExc_RuntimeError,
1397 "internal error in regular expression engine"
1398 );
1399 }
1400}
1401
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001402static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001403pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001404{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001405 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001407 MatchObject* match;
1408 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001409 char* base;
1410 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001412 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001413
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001414 /* create match object (with room for extra group marks) */
1415 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001416 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001417 if (!match)
1418 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001420 Py_INCREF(pattern);
1421 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001423 Py_INCREF(state->string);
1424 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001425
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001426 match->regs = NULL;
1427 match->groups = pattern->groups+1;
1428
1429 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001430
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001431 base = (char*) state->beginning;
1432 n = state->charsize;
1433
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001434 match->mark[0] = ((char*) state->start - base) / n;
1435 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001436
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001437 for (i = j = 0; i < pattern->groups; i++, j+=2)
1438 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1439 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1440 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1441 } else
1442 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1443
1444 match->pos = state->pos;
1445 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001446
Fredrik Lundh6f013982000-07-03 18:44:21 +00001447 match->lastindex = state->lastindex;
1448
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001449 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001450
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001451 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001452
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001453 /* no match */
1454 Py_INCREF(Py_None);
1455 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001456
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001457 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001458
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001459 /* internal error */
1460 pattern_error(status);
1461 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001462}
1463
1464static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001465pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001466{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001467 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001468
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001469 ScannerObject* self;
1470
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001471 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001472 int start = 0;
1473 int end = INT_MAX;
1474 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1475 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001476
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001477 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001478 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001479 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001480 return NULL;
1481
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001482 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001483 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001484 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001485 return NULL;
1486 }
1487
1488 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001489 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001490
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001491 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001492}
1493
Guido van Rossumb700df92000-03-31 14:59:30 +00001494static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001495pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001496{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001497 Py_XDECREF(self->pattern);
1498 Py_XDECREF(self->groupindex);
1499 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001500}
1501
1502static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001503pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001504{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001505 SRE_STATE state;
1506 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001507
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001508 PyObject* string;
1509 int start = 0;
1510 int end = INT_MAX;
1511 if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
1512 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001513
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001514 string = state_init(&state, self, string, start, end);
1515 if (!string)
1516 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001517
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001518 state.ptr = state.start;
1519
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001520 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1521
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001522 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001523 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001524 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001525#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001526 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001527#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001528 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001529
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001530 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1531
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001532 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001533
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001534 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001535}
1536
1537static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001538pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001539{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001540 SRE_STATE state;
1541 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001542
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001543 PyObject* string;
1544 int start = 0;
1545 int end = INT_MAX;
1546 if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
1547 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001548
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001549 string = state_init(&state, self, string, start, end);
1550 if (!string)
1551 return NULL;
1552
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001553 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1554
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001555 if (state.charsize == 1) {
1556 status = sre_search(&state, PatternObject_GetCode(self));
1557 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001558#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001559 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001560#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001561 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001562
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001563 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1564
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001565 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001566
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001567 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001568}
1569
1570static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001571call(char* function, PyObject* args)
1572{
1573 PyObject* name;
1574 PyObject* module;
1575 PyObject* func;
1576 PyObject* result;
1577
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001578 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001579 if (!name)
1580 return NULL;
1581 module = PyImport_Import(name);
1582 Py_DECREF(name);
1583 if (!module)
1584 return NULL;
1585 func = PyObject_GetAttrString(module, function);
1586 Py_DECREF(module);
1587 if (!func)
1588 return NULL;
1589 result = PyObject_CallObject(func, args);
1590 Py_DECREF(func);
1591 Py_DECREF(args);
1592 return result;
1593}
1594
1595static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001596pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001597{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001598 PyObject* template;
1599 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001600 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001601 if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
1602 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001603
1604 /* delegate to Python code */
1605 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1606}
1607
1608static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001609pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001610{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001611 PyObject* template;
1612 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001613 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001614 if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
1615 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001616
1617 /* delegate to Python code */
1618 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1619}
1620
1621static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001622pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001623{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001624 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001625 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001626 if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
1627 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001628
1629 /* delegate to Python code */
1630 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1631}
1632
1633static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001634pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001635{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001636 SRE_STATE state;
1637 PyObject* list;
1638 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001639 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001640
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001641 PyObject* string;
1642 int start = 0;
1643 int end = INT_MAX;
1644 if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
1645 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001646
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001647 string = state_init(&state, self, string, start, end);
1648 if (!string)
1649 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001650
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001651 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001652
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001653 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001654
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001655 PyObject* item;
1656
1657 state.ptr = state.start;
1658
1659 if (state.charsize == 1) {
1660 status = sre_search(&state, PatternObject_GetCode(self));
1661 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001662#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001663 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001664#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001665 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001666
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001667 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001668
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001669 /* don't bother to build a match object */
1670 switch (self->groups) {
1671 case 0:
1672 item = PySequence_GetSlice(
1673 string,
1674 ((char*) state.start - (char*) state.beginning) /
1675 state.charsize,
1676 ((char*) state.ptr - (char*) state.beginning) /
1677 state.charsize);
1678 if (!item)
1679 goto error;
1680 break;
1681 case 1:
1682 item = state_getslice(&state, 1, string);
1683 if (!item)
1684 goto error;
1685 break;
1686 default:
1687 item = PyTuple_New(self->groups);
1688 if (!item)
1689 goto error;
1690 for (i = 0; i < self->groups; i++) {
1691 PyObject* o = state_getslice(&state, i+1, string);
1692 if (!o) {
1693 Py_DECREF(item);
1694 goto error;
1695 }
1696 PyTuple_SET_ITEM(item, i, o);
1697 }
1698 break;
1699 }
1700
Barry Warsaw152fbe82000-08-18 05:09:50 +00001701 status = PyList_Append(list, item);
1702 Py_DECREF(item);
1703 if (status < 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001704 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001705
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001706 if (state.ptr == state.start)
1707 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001708 else
1709 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001710
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001711 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001712
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001713 if (status == 0)
1714 break;
1715
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001716 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001717 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001718
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001719 }
1720 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001721
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001722 state_fini(&state);
1723 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001724
1725error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001726 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001727 state_fini(&state);
1728 return NULL;
1729
Guido van Rossumb700df92000-03-31 14:59:30 +00001730}
1731
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001732static PyMethodDef pattern_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001733 {"match", (PyCFunction) pattern_match, 1},
1734 {"search", (PyCFunction) pattern_search, 1},
1735 {"sub", (PyCFunction) pattern_sub, 1},
1736 {"subn", (PyCFunction) pattern_subn, 1},
1737 {"split", (PyCFunction) pattern_split, 1},
1738 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001739 /* experimental */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001740 {"scanner", (PyCFunction) pattern_scanner, 1},
1741 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001742};
1743
1744static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001745pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001746{
1747 PyObject* res;
1748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001749 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001750
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001751 if (res)
1752 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001753
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001754 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001755
1756 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001757 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001758 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001759 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001760 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001761
1762 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001763 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001764
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001765 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001766 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001767
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001768 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001769 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001770 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001771 }
1772
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001773 PyErr_SetString(PyExc_AttributeError, name);
1774 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001775}
1776
1777statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001778 PyObject_HEAD_INIT(NULL)
1779 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001780 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001781 (destructor)pattern_dealloc, /*tp_dealloc*/
1782 0, /*tp_print*/
1783 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001784};
1785
1786/* -------------------------------------------------------------------- */
1787/* match methods */
1788
1789static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001790match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001791{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001792 Py_XDECREF(self->regs);
1793 Py_XDECREF(self->string);
1794 Py_DECREF(self->pattern);
1795 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001796}
1797
1798static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001799match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001800{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001801 if (index < 0 || index >= self->groups) {
1802 /* raise IndexError if we were given a bad group number */
1803 PyErr_SetString(
1804 PyExc_IndexError,
1805 "no such group"
1806 );
1807 return NULL;
1808 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001809
Fredrik Lundh6f013982000-07-03 18:44:21 +00001810 index *= 2;
1811
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001812 if (self->string == Py_None || self->mark[index] < 0) {
1813 /* return default value if the string or group is undefined */
1814 Py_INCREF(def);
1815 return def;
1816 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001817
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001818 return PySequence_GetSlice(
1819 self->string, self->mark[index], self->mark[index+1]
1820 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001821}
1822
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001823static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001824match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001825{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001826 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001827
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001828 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001829 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001830
Fredrik Lundh6f013982000-07-03 18:44:21 +00001831 i = -1;
1832
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001833 if (self->pattern->groupindex) {
1834 index = PyObject_GetItem(self->pattern->groupindex, index);
1835 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001836 if (PyInt_Check(index))
1837 i = (int) PyInt_AS_LONG(index);
1838 Py_DECREF(index);
1839 } else
1840 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001841 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001842
1843 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001844}
1845
1846static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001847match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001848{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001849 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001850}
1851
1852static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001853match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001854{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001855 PyObject* result;
1856 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001857
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001858 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001859
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001860 switch (size) {
1861 case 0:
1862 result = match_getslice(self, Py_False, Py_None);
1863 break;
1864 case 1:
1865 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1866 break;
1867 default:
1868 /* fetch multiple items */
1869 result = PyTuple_New(size);
1870 if (!result)
1871 return NULL;
1872 for (i = 0; i < size; i++) {
1873 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001874 self, PyTuple_GET_ITEM(args, i), Py_None
1875 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001876 if (!item) {
1877 Py_DECREF(result);
1878 return NULL;
1879 }
1880 PyTuple_SET_ITEM(result, i, item);
1881 }
1882 break;
1883 }
1884 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001885}
1886
1887static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001888match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001889{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001890 PyObject* result;
1891 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001892
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 PyObject* def = Py_None;
1894 if (!PyArg_ParseTuple(args, "|O:groups", &def))
1895 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001896
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001897 result = PyTuple_New(self->groups-1);
1898 if (!result)
1899 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001900
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001901 for (index = 1; index < self->groups; index++) {
1902 PyObject* item;
1903 item = match_getslice_by_index(self, index, def);
1904 if (!item) {
1905 Py_DECREF(result);
1906 return NULL;
1907 }
1908 PyTuple_SET_ITEM(result, index-1, item);
1909 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001910
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001911 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001912}
1913
1914static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001915match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001916{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001917 PyObject* result;
1918 PyObject* keys;
1919 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001920
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001921 PyObject* def = Py_None;
1922 if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
1923 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001924
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001925 result = PyDict_New();
1926 if (!result || !self->pattern->groupindex)
1927 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001928
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001929 keys = PyMapping_Keys(self->pattern->groupindex);
1930 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001931 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001932 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001933 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001934
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001935 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1936 PyObject* key;
1937 PyObject* item;
1938 key = PyList_GET_ITEM(keys, index);
1939 if (!key) {
1940 Py_DECREF(keys);
1941 Py_DECREF(result);
1942 return NULL;
1943 }
1944 item = match_getslice(self, key, def);
1945 if (!item) {
1946 Py_DECREF(key);
1947 Py_DECREF(keys);
1948 Py_DECREF(result);
1949 return NULL;
1950 }
1951 /* FIXME: <fl> this can fail, right? */
1952 PyDict_SetItem(result, key, item);
1953 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001954
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001955 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001956
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001957 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001958}
1959
1960static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001961match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001962{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001963 int index;
1964
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001965 PyObject* index_ = Py_False; /* zero */
1966 if (!PyArg_ParseTuple(args, "|O:start", &index_))
1967 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001968
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001969 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001970
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001971 if (index < 0 || index >= self->groups) {
1972 PyErr_SetString(
1973 PyExc_IndexError,
1974 "no such group"
1975 );
1976 return NULL;
1977 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001978
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001979 if (self->mark[index*2] < 0) {
1980 Py_INCREF(Py_None);
1981 return Py_None;
1982 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001983
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001984 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00001985}
1986
1987static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001988match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001989{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001990 int index;
1991
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001992 PyObject* index_ = Py_False; /* zero */
1993 if (!PyArg_ParseTuple(args, "|O:end", &index_))
1994 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001995
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001996 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001997
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001998 if (index < 0 || index >= self->groups) {
1999 PyErr_SetString(
2000 PyExc_IndexError,
2001 "no such group"
2002 );
2003 return NULL;
2004 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002005
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002006 if (self->mark[index*2] < 0) {
2007 Py_INCREF(Py_None);
2008 return Py_None;
2009 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002010
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002011 return Py_BuildValue("i", self->mark[index*2+1]);
2012}
2013
2014LOCAL(PyObject*)
2015_pair(int i1, int i2)
2016{
2017 PyObject* pair;
2018 PyObject* item;
2019
2020 pair = PyTuple_New(2);
2021 if (!pair)
2022 return NULL;
2023
2024 item = PyInt_FromLong(i1);
2025 if (!item)
2026 goto error;
2027 PyTuple_SET_ITEM(pair, 0, item);
2028
2029 item = PyInt_FromLong(i2);
2030 if (!item)
2031 goto error;
2032 PyTuple_SET_ITEM(pair, 1, item);
2033
2034 return pair;
2035
2036 error:
2037 Py_DECREF(pair);
2038 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002039}
2040
2041static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002042match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002043{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002044 int index;
2045
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002046 PyObject* index_ = Py_False; /* zero */
2047 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2048 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002049
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002050 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002051
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002052 if (index < 0 || index >= self->groups) {
2053 PyErr_SetString(
2054 PyExc_IndexError,
2055 "no such group"
2056 );
2057 return NULL;
2058 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002059
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 if (self->mark[index*2] < 0) {
2061 Py_INCREF(Py_None);
2062 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002063 return Py_BuildValue("OO", Py_None, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002065
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 return _pair(self->mark[index*2], self->mark[index*2+1]);
2067}
2068
2069static PyObject*
2070match_regs(MatchObject* self)
2071{
2072 PyObject* regs;
2073 PyObject* item;
2074 int index;
2075
2076 regs = PyTuple_New(self->groups);
2077 if (!regs)
2078 return NULL;
2079
2080 for (index = 0; index < self->groups; index++) {
2081 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2082 if (!item) {
2083 Py_DECREF(regs);
2084 return NULL;
2085 }
2086 PyTuple_SET_ITEM(regs, index, item);
2087 }
2088
2089 Py_INCREF(regs);
2090 self->regs = regs;
2091
2092 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002093}
2094
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002095static PyMethodDef match_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002096 {"group", (PyCFunction) match_group, 1},
2097 {"start", (PyCFunction) match_start, 1},
2098 {"end", (PyCFunction) match_end, 1},
2099 {"span", (PyCFunction) match_span, 1},
2100 {"groups", (PyCFunction) match_groups, 1},
2101 {"groupdict", (PyCFunction) match_groupdict, 1},
2102 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002103};
2104
2105static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002106match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002107{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002108 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002109
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002110 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2111 if (res)
2112 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002113
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002114 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002115
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002116 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002117 if (self->lastindex >= 0)
2118 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002119 Py_INCREF(Py_None);
2120 return Py_None;
2121 }
2122
2123 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002124 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002125 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002126 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002127 );
2128 if (result)
2129 return result;
2130 PyErr_Clear();
2131 }
2132 Py_INCREF(Py_None);
2133 return Py_None;
2134 }
2135
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 if (!strcmp(name, "string")) {
2137 if (self->string) {
2138 Py_INCREF(self->string);
2139 return self->string;
2140 } else {
2141 Py_INCREF(Py_None);
2142 return Py_None;
2143 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002144 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002145
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002146 if (!strcmp(name, "regs")) {
2147 if (self->regs) {
2148 Py_INCREF(self->regs);
2149 return self->regs;
2150 } else
2151 return match_regs(self);
2152 }
2153
2154 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002155 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002156 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002157 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002159 if (!strcmp(name, "pos"))
2160 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002161
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002162 if (!strcmp(name, "endpos"))
2163 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002164
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002165 PyErr_SetString(PyExc_AttributeError, name);
2166 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002167}
2168
2169/* FIXME: implement setattr("string", None) as a special case (to
2170 detach the associated string, if any */
2171
2172statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002173 PyObject_HEAD_INIT(NULL)
2174 0, "SRE_Match",
2175 sizeof(MatchObject), sizeof(int),
2176 (destructor)match_dealloc, /*tp_dealloc*/
2177 0, /*tp_print*/
2178 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002179};
2180
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002181/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002182/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002183
2184static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002185scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002186{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002187 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002188 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002189 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002190}
2191
2192static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002193scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002194{
2195 SRE_STATE* state = &self->state;
2196 PyObject* match;
2197 int status;
2198
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002199 state_reset(state);
2200
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002201 state->ptr = state->start;
2202
2203 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002204 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002205 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002206#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002207 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002208#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002209 }
2210
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002211 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002212 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002213
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002214 if (status == 0 || state->ptr == state->start)
2215 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002216 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002217 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002218
2219 return match;
2220}
2221
2222
2223static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002224scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002225{
2226 SRE_STATE* state = &self->state;
2227 PyObject* match;
2228 int status;
2229
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002230 state_reset(state);
2231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002232 state->ptr = state->start;
2233
2234 if (state->charsize == 1) {
2235 status = sre_search(state, PatternObject_GetCode(self->pattern));
2236 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002237#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002238 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002239#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002240 }
2241
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002242 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002243 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002244
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002245 if (status == 0 || state->ptr == state->start)
2246 state->start = (void*) ((char*) state->ptr + state->charsize);
2247 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002248 state->start = state->ptr;
2249
2250 return match;
2251}
2252
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002253static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002254 {"match", (PyCFunction) scanner_match, 0},
2255 {"search", (PyCFunction) scanner_search, 0},
2256 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002257};
2258
2259static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002260scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002261{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002262 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002263
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002264 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2265 if (res)
2266 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002267
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002268 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002269
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002270 /* attributes */
2271 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002272 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002273 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002274 }
2275
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002276 PyErr_SetString(PyExc_AttributeError, name);
2277 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002278}
2279
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002280statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002281 PyObject_HEAD_INIT(NULL)
2282 0, "SRE_Scanner",
2283 sizeof(ScannerObject), 0,
2284 (destructor)scanner_dealloc, /*tp_dealloc*/
2285 0, /*tp_print*/
2286 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002287};
2288
Guido van Rossumb700df92000-03-31 14:59:30 +00002289static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002290 {"compile", _compile, 1},
2291 {"getcodesize", sre_codesize, 1},
2292 {"getlower", sre_getlower, 1},
2293 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002294};
2295
2296void
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002297#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002298__declspec(dllexport)
2299#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002300init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002301{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002302 /* Patch object types */
2303 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002304 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002305
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002306 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002307}
2308
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002309#endif /* !defined(SRE_RECURSIVE) */