blob: cf4982dd5606b15db06d563b9b283494761bdb13 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of (0.5)
9 * 2000-06-30 fl added fast search optimization (0.9.3)
10 * 2000-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11 * 2000-07-02 fl added charset optimizations, etc (0.9.5)
12 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
14 * 2000-07-21 fl reset lastindex in scanner methods (0.9.6)
15 * 2000-08-01 fl fixes for 1.6b1 (0.9.8)
16 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Guido van Rossumb700df92000-03-31 14:59:30 +000023 *
24 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
25 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000026 * This version of the SRE library can be redistributed under CNRI's
27 * Python 1.6 license. For any other use, please contact Secret Labs
28 * AB (info@pythonware.com).
29 *
Guido van Rossumb700df92000-03-31 14:59:30 +000030 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000031 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * other compatibility work.
33 */
34
35#ifndef SRE_RECURSIVE
36
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000037char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000038
39#include "Python.h"
40
41#include "sre.h"
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#if defined(HAVE_LIMITS_H)
44#include <limits.h>
45#else
46#define INT_MAX 2147483647
47#endif
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
52#define MODULE "sre"
53
Guido van Rossumb700df92000-03-31 14:59:30 +000054/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000055#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000056
Fredrik Lundh436c3d582000-06-29 08:58:44 +000057#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000058/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000059#define HAVE_UNICODE
60#endif
61
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundh33accc12000-08-27 20:59:47 +000065/* prevent run-away recursion (bad patterns on long strings) */
66
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000067#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000068#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
69/* require smaller recursion limit for a number of 64-bit platforms:
70 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
71/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
72#define USE_RECURSION_LIMIT 7500
73#else
74#define USE_RECURSION_LIMIT 10000
75#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000076#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000077
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define USE_FAST_SEARCH
80
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000082#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000083
84/* -------------------------------------------------------------------- */
85
Fredrik Lundh80946112000-06-29 18:03:25 +000086#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000087#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000088#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000089/* fastest possible local call under MSVC */
90#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000092#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000093#else
94#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000095#endif
96
97/* error codes */
98#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000099#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000100#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000101#define SRE_ERROR_MEMORY -9 /* out of memory */
102
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000103#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000104#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000105#else
106#define TRACE(v)
107#endif
108
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000109/* -------------------------------------------------------------------- */
110/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000111
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000112/* default character predicates (run sre_chars.py to regenerate tables) */
113
114#define SRE_DIGIT_MASK 1
115#define SRE_SPACE_MASK 2
116#define SRE_LINEBREAK_MASK 4
117#define SRE_ALNUM_MASK 8
118#define SRE_WORD_MASK 16
119
120static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1212, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1220, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12325, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1250, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12624, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
127
Fredrik Lundhb389df32000-06-29 12:48:37 +0000128static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012910, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13027, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
13144, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13261, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
133108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
134122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
135106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
136120, 121, 122, 123, 124, 125, 126, 127 };
137
Fredrik Lundhb389df32000-06-29 12:48:37 +0000138static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000139{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000140 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000141}
142
143#define SRE_IS_DIGIT(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
145#define SRE_IS_SPACE(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
147#define SRE_IS_LINEBREAK(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
149#define SRE_IS_ALNUM(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
151#define SRE_IS_WORD(ch)\
152 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000153
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000154/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000155
Fredrik Lundhb389df32000-06-29 12:48:37 +0000156static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000157{
158 return ((ch) < 256 ? tolower((ch)) : ch);
159}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000160#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
161#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
162#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
163#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
164#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
165
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000166/* unicode-specific character predicates */
167
168#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000169static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000170{
171 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
172}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
174#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
175#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000176#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000177#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000178#endif
179
Guido van Rossumb700df92000-03-31 14:59:30 +0000180LOCAL(int)
181sre_category(SRE_CODE category, unsigned int ch)
182{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000183 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000184
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000185 case SRE_CATEGORY_DIGIT:
186 return SRE_IS_DIGIT(ch);
187 case SRE_CATEGORY_NOT_DIGIT:
188 return !SRE_IS_DIGIT(ch);
189 case SRE_CATEGORY_SPACE:
190 return SRE_IS_SPACE(ch);
191 case SRE_CATEGORY_NOT_SPACE:
192 return !SRE_IS_SPACE(ch);
193 case SRE_CATEGORY_WORD:
194 return SRE_IS_WORD(ch);
195 case SRE_CATEGORY_NOT_WORD:
196 return !SRE_IS_WORD(ch);
197 case SRE_CATEGORY_LINEBREAK:
198 return SRE_IS_LINEBREAK(ch);
199 case SRE_CATEGORY_NOT_LINEBREAK:
200 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000201
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000202 case SRE_CATEGORY_LOC_WORD:
203 return SRE_LOC_IS_WORD(ch);
204 case SRE_CATEGORY_LOC_NOT_WORD:
205 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000206
207#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000208 case SRE_CATEGORY_UNI_DIGIT:
209 return SRE_UNI_IS_DIGIT(ch);
210 case SRE_CATEGORY_UNI_NOT_DIGIT:
211 return !SRE_UNI_IS_DIGIT(ch);
212 case SRE_CATEGORY_UNI_SPACE:
213 return SRE_UNI_IS_SPACE(ch);
214 case SRE_CATEGORY_UNI_NOT_SPACE:
215 return !SRE_UNI_IS_SPACE(ch);
216 case SRE_CATEGORY_UNI_WORD:
217 return SRE_UNI_IS_WORD(ch);
218 case SRE_CATEGORY_UNI_NOT_WORD:
219 return !SRE_UNI_IS_WORD(ch);
220 case SRE_CATEGORY_UNI_LINEBREAK:
221 return SRE_UNI_IS_LINEBREAK(ch);
222 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
223 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000224#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000225 }
226 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000227}
228
229/* helpers */
230
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000231static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000232mark_fini(SRE_STATE* state)
233{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000234 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000235 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000236 state->mark_stack = NULL;
237 }
238 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000239}
240
241static int
242mark_save(SRE_STATE* state, int lo, int hi)
243{
244 void* stack;
245 int size;
246 int minsize, newsize;
247
248 if (hi <= lo)
249 return 0;
250
251 size = (hi - lo) + 1;
252
253 newsize = state->mark_stack_size;
254 minsize = state->mark_stack_base + size;
255
256 if (newsize < minsize) {
257 /* create new stack */
258 if (!newsize) {
259 newsize = 512;
260 if (newsize < minsize)
261 newsize = minsize;
262 TRACE(("allocate stack %d\n", newsize));
263 stack = malloc(sizeof(void*) * newsize);
264 } else {
265 /* grow the stack */
266 while (newsize < minsize)
267 newsize += newsize;
268 TRACE(("grow stack to %d\n", newsize));
269 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
270 }
271 if (!stack) {
272 mark_fini(state);
273 return SRE_ERROR_MEMORY;
274 }
275 state->mark_stack = stack;
276 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000277 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000279 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000280
281 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
282 size * sizeof(void*));
283
284 state->mark_stack_base += size;
285
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000286 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000287}
288
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000289static int
290mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000291{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000292 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000293
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294 if (hi <= lo)
295 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000296
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000297 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000298
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000299 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000300
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000301 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000302
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000303 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
304 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000305
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000306 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000307}
308
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000309/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000310
311#define SRE_CHAR unsigned char
312#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000313#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000314#define SRE_CHARSET sre_charset
315#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000316#define SRE_MATCH sre_match
317#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000318
319#if defined(HAVE_UNICODE)
320
Guido van Rossumb700df92000-03-31 14:59:30 +0000321#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000322#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000323#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000324
Guido van Rossumb700df92000-03-31 14:59:30 +0000325#undef SRE_SEARCH
326#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000327#undef SRE_INFO
328#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000329#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000330#undef SRE_AT
331#undef SRE_CHAR
332
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000333/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000334
335#define SRE_CHAR Py_UNICODE
336#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000337#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000338#define SRE_CHARSET sre_ucharset
339#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000340#define SRE_MATCH sre_umatch
341#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000342#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
344#endif /* SRE_RECURSIVE */
345
346/* -------------------------------------------------------------------- */
347/* String matching engine */
348
349/* the following section is compiled twice, with different character
350 settings */
351
352LOCAL(int)
353SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
354{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000355 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000357 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000358
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000359 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000360
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000361 case SRE_AT_BEGINNING:
362 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000363
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000364 case SRE_AT_BEGINNING_LINE:
365 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000366 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000367
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000368 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000369 return (((void*) (ptr+1) == state->end &&
370 SRE_IS_LINEBREAK((int) ptr[0])) ||
371 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000372
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000373 case SRE_AT_END_LINE:
374 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000375 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000376
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000377 case SRE_AT_BOUNDARY:
378 if (state->beginning == state->end)
379 return 0;
380 that = ((void*) ptr > state->beginning) ?
381 SRE_IS_WORD((int) ptr[-1]) : 0;
382 this = ((void*) ptr < state->end) ?
383 SRE_IS_WORD((int) ptr[0]) : 0;
384 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000385
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000386 case SRE_AT_NON_BOUNDARY:
387 if (state->beginning == state->end)
388 return 0;
389 that = ((void*) ptr > state->beginning) ?
390 SRE_IS_WORD((int) ptr[-1]) : 0;
391 this = ((void*) ptr < state->end) ?
392 SRE_IS_WORD((int) ptr[0]) : 0;
393 return this == that;
394 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000395
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000396 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000397}
398
399LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000400SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000401{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000403
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000405
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000406 for (;;) {
407 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000410 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 if (ch == set[0])
412 return ok;
413 set++;
414 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000415
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000416 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000417 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000418 if (set[0] <= ch && ch <= set[1])
419 return ok;
420 set += 2;
421 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000422
Fredrik Lundh3562f112000-07-02 12:00:07 +0000423 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000424 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000425 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
426 return ok;
427 set += 16;
428 break;
429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000431 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000432 if (sre_category(set[0], (int) ch))
433 return ok;
434 set += 1;
435 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000436
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000437 case SRE_OP_NEGATE:
438 ok = !ok;
439 break;
440
441 case SRE_OP_FAILURE:
442 return !ok;
443
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 default:
445 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000446 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000447 return 0;
448 }
449 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000450}
451
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000452LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
453
454LOCAL(int)
455SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
456{
457 SRE_CODE chr;
458 SRE_CHAR* ptr = state->ptr;
459 SRE_CHAR* end = state->end;
460 int i;
461
462 /* adjust end */
463 if (maxcount < end - ptr && maxcount != 65535)
464 end = ptr + maxcount;
465
466 switch (pattern[0]) {
467
468 case SRE_OP_ANY:
469 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000470 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000471 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
472 ptr++;
473 break;
474
475 case SRE_OP_ANY_ALL:
476 /* repeated dot wildcare. skip to the end of the target
477 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000478 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000479 ptr = end;
480 break;
481
482 case SRE_OP_LITERAL:
483 /* repeated literal */
484 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000485 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000486 while (ptr < end && (SRE_CODE) *ptr == chr)
487 ptr++;
488 break;
489
490 case SRE_OP_LITERAL_IGNORE:
491 /* repeated literal */
492 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000493 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000494 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
495 ptr++;
496 break;
497
498 case SRE_OP_NOT_LITERAL:
499 /* repeated non-literal */
500 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000501 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000502 while (ptr < end && (SRE_CODE) *ptr != chr)
503 ptr++;
504 break;
505
506 case SRE_OP_NOT_LITERAL_IGNORE:
507 /* repeated non-literal */
508 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000509 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000510 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
511 ptr++;
512 break;
513
514 case SRE_OP_IN:
515 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000516 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
517 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000518 ptr++;
519 break;
520
521 default:
522 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000523 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000524 while ((SRE_CHAR*) state->ptr < end) {
525 i = SRE_MATCH(state, pattern, level);
526 if (i < 0)
527 return i;
528 if (!i)
529 break;
530 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000531 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
532 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000533 return (SRE_CHAR*) state->ptr - ptr;
534 }
535
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000536 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000537 return ptr - (SRE_CHAR*) state->ptr;
538}
539
Fredrik Lundh33accc12000-08-27 20:59:47 +0000540#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000541LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000542SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
543{
544 /* check if an SRE_OP_INFO block matches at the current position.
545 returns the number of SRE_CODE objects to skip if successful, 0
546 if no match */
547
548 SRE_CHAR* end = state->end;
549 SRE_CHAR* ptr = state->ptr;
550 int i;
551
552 /* check minimal length */
553 if (pattern[3] && (end - ptr) < pattern[3])
554 return 0;
555
556 /* check known prefix */
557 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
558 /* <length> <skip> <prefix data> <overlap data> */
559 for (i = 0; i < pattern[5]; i++)
560 if ((SRE_CODE) ptr[i] != pattern[7 + i])
561 return 0;
562 return pattern[0] + 2 * pattern[6];
563 }
564 return pattern[0];
565}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000566#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000567
568LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000569SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000570{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000571 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000572 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000573
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000574 SRE_CHAR* end = state->end;
575 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000576 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000577 SRE_REPEAT* rp;
578 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000579 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000580
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000581 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000582
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000583 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000584
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000585#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000586 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000587 return SRE_ERROR_RECURSION_LIMIT;
588#endif
589
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000590#if defined(USE_RECURSION_LIMIT)
591 if (level > USE_RECURSION_LIMIT)
592 return SRE_ERROR_RECURSION_LIMIT;
593#endif
594
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000595 if (pattern[0] == SRE_OP_INFO) {
596 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000597 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000598 if (pattern[3] && (end - ptr) < pattern[3]) {
599 TRACE(("reject (got %d chars, need %d)\n",
600 (end - ptr), pattern[3]));
601 return 0;
602 }
603 pattern += pattern[1] + 1;
604 }
605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000607
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000608 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000609
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000610 case SRE_OP_FAILURE:
611 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000613 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000614
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000615 case SRE_OP_SUCCESS:
616 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000617 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000618 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000619 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000620
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000621 case SRE_OP_AT:
622 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000623 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000624 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000625 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000626 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000627 pattern++;
628 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000629
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000630 case SRE_OP_CATEGORY:
631 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000632 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000633 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000634 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000635 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000636 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000637 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000638 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000639
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000640 case SRE_OP_LITERAL:
641 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000642 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000643 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000644 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000645 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000646 pattern++;
647 ptr++;
648 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000649
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000650 case SRE_OP_NOT_LITERAL:
651 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000652 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000653 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000655 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000656 pattern++;
657 ptr++;
658 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000659
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000660 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000661 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000662 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000663 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000664 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
665 return 0;
666 ptr++;
667 break;
668
669 case SRE_OP_ANY_ALL:
670 /* match anything */
671 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000672 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000673 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000674 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000675 ptr++;
676 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000677
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 case SRE_OP_IN:
679 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000680 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000681 TRACE(("|%p|%p|IN\n", pattern, ptr));
682 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000684 pattern += pattern[0];
685 ptr++;
686 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000687
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000688 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000690 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000691 i = pattern[0];
692 {
693 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
694 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
695 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000696 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000697 while (p < e) {
698 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000699 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000700 p++; ptr++;
701 }
702 }
703 pattern++;
704 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000705
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000706 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000707 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000708 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000709 i = pattern[0];
710 {
711 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
712 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
713 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000714 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000715 while (p < e) {
716 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000717 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000718 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000719 p++; ptr++;
720 }
721 }
722 pattern++;
723 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000724
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000726 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000728 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000729 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000730 pattern++;
731 ptr++;
732 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000733
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000735 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000737 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000738 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 pattern++;
740 ptr++;
741 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000742
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000744 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000746 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000747 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 pattern += pattern[0];
749 ptr++;
750 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 case SRE_OP_MARK:
753 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000754 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000755 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000756 i = pattern[0];
757 if (i & 1)
758 state->lastindex = i/2 + 1;
759 if (i > state->lastmark)
760 state->lastmark = i;
761 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 pattern++;
763 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000764
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000765 case SRE_OP_JUMP:
766 case SRE_OP_INFO:
767 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000768 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000769 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000770 pattern += pattern[0];
771 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000772
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000773 case SRE_OP_ASSERT:
774 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000775 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000776 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000777 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000778 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000779 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000780 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000781 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000782 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000783 if (pattern[1] > 0 && state->ptr != ptr)
784 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000785 pattern += pattern[0];
786 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000787
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000788 case SRE_OP_ASSERT_NOT:
789 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000790 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000791 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000793 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000794 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000795 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000796 if (i < 0)
797 return i;
798 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000799 return 0;
800 if (pattern[1] > 0 && state->ptr != ptr)
801 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000802 pattern += pattern[0];
803 break;
804
805 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000806 /* alternation */
807 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000808 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000809 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000810 for (; pattern[0]; pattern += pattern[0]) {
811 if (pattern[1] == SRE_OP_LITERAL &&
812 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
813 continue;
814 if (pattern[1] == SRE_OP_IN &&
815 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
816 continue;
817 state->ptr = ptr;
818 i = SRE_MATCH(state, pattern + 1, level + 1);
819 if (i)
820 return i;
821 if (state->lastmark > lastmark) {
822 memset(
823 state->mark + lastmark + 1, 0,
824 (state->lastmark - lastmark) * sizeof(void*)
825 );
826 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000827 }
828 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000829 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000830
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000831 case SRE_OP_REPEAT_ONE:
832 /* match repeated sequence (maximizing regexp) */
833
834 /* this operator only works if the repeated item is
835 exactly one character wide, and we're not already
836 collecting backtracking points. for other cases,
837 use the MAX_REPEAT operator instead */
838
839 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
840
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000841 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000842 pattern[1], pattern[2]));
843
Fredrik Lundhe1869832000-08-01 22:47:49 +0000844 if (ptr + pattern[1] > end)
845 return 0; /* cannot match */
846
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000847 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000848
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000849 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
850 if (count < 0)
851 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000852
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000853 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000854
855 /* when we arrive here, count contains the number of
856 matches, and ptr points to the tail of the target
857 string. check if the rest of the pattern matches,
858 and backtrack if not. */
859
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000860 if (count < (int) pattern[1])
861 return 0;
862
863 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
864 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000865 state->ptr = ptr;
866 return 1;
867
868 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
869 /* tail starts with a literal. skip positions where
870 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000871 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000872 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000873 while (count >= (int) pattern[1] &&
874 (ptr >= end || *ptr != chr)) {
875 ptr--;
876 count--;
877 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000878 if (count < (int) pattern[1])
879 break;
880 state->ptr = ptr;
881 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000882 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000883 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000884 ptr--;
885 count--;
886 }
887
888 } else {
889 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000890 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000891 while (count >= (int) pattern[1]) {
892 state->ptr = ptr;
893 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000894 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000895 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000896 ptr--;
897 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000898 if (state->lastmark > lastmark) {
899 memset(
900 state->mark + lastmark + 1, 0,
901 (state->lastmark - lastmark) * sizeof(void*)
902 );
903 state->lastmark = lastmark;
904 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000905 }
906 }
907 return 0;
908
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000909 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000910 /* create repeat context. all the hard work is done
911 by the UNTIL operator */
912 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000913 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000914 pattern[1], pattern[2]));
915
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000916 rep.count = -1;
917 rep.pattern = pattern;
918
919 /* install new repeat context */
920 rep.prev = state->repeat;
921 state->repeat = &rep;
922
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000923 state->ptr = ptr;
924 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000925
926 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000927
928 return i;
929
930 case SRE_OP_MAX_UNTIL:
931 /* maximizing repeat */
932 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
933
934 /* FIXME: we probably need to deal with zero-width
935 matches in here... */
936
937 rp = state->repeat;
938 if (!rp)
939 return SRE_ERROR_STATE;
940
941 state->ptr = ptr;
942
943 count = rp->count + 1;
944
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000945 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000946
947 if (count < rp->pattern[1]) {
948 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000949 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000950 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000951 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000952 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000953 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000954 rp->count = count - 1;
955 state->ptr = ptr;
956 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000957 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000958
959 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000960 /* we may have enough matches, but if we can
961 match another item, do so */
962 rp->count = count;
963 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000964 i = mark_save(state, 0, lastmark);
965 if (i < 0)
966 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000967 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000968 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000969 if (i)
970 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000971 i = mark_restore(state, 0, lastmark);
972 if (i < 0)
973 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000974 rp->count = count - 1;
975 state->ptr = ptr;
976 }
977
978 /* cannot match more repeated items here. make sure the
979 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000980 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000981 i = SRE_MATCH(state, pattern, level + 1);
982 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000983 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000984 state->repeat = rp;
985 return 0;
986
987 case SRE_OP_MIN_UNTIL:
988 /* minimizing repeat */
989 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
990
991 rp = state->repeat;
992 if (!rp)
993 return SRE_ERROR_STATE;
994
995 count = rp->count + 1;
996
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000997 TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000998
999 state->ptr = ptr;
1000
1001 if (count < rp->pattern[1]) {
1002 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001003 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001004 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001005 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001006 if (i)
1007 return i;
1008 rp->count = count-1;
1009 state->ptr = ptr;
1010 return 0;
1011 }
1012
1013 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001014 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001015 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001016 if (i) {
1017 /* free(rp); */
1018 return i;
1019 }
1020 state->repeat = rp;
1021
1022 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1023 return 0;
1024
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001025 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001026 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001027 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001028 if (i)
1029 return i;
1030 rp->count = count - 1;
1031 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001032
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001033 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001034 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001035 return SRE_ERROR_ILLEGAL;
1036 }
1037 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001038
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001039 /* shouldn't end up here */
1040 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001041}
1042
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001043LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001044SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1045{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001046 SRE_CHAR* ptr = state->start;
1047 SRE_CHAR* end = state->end;
1048 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001049 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001050 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001051 SRE_CODE* prefix = NULL;
1052 SRE_CODE* charset = NULL;
1053 SRE_CODE* overlap = NULL;
1054 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001055
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001056 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001057 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001058 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001059
1060 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001061
1062 if (pattern[3] > 0) {
1063 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001064 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001065 end -= pattern[3]-1;
1066 if (end <= ptr)
1067 end = ptr+1;
1068 }
1069
Fredrik Lundh3562f112000-07-02 12:00:07 +00001070 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001071 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001072 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001073 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001074 prefix_skip = pattern[6];
1075 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001076 overlap = prefix + prefix_len - 1;
1077 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001078 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001079 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001080 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001081
1082 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001083 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001084
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001085 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1086 TRACE(("charset = %p\n", charset));
1087
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001088#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001089 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001090 /* pattern starts with a known prefix. use the overlap
1091 table to skip forward as fast as we possibly can */
1092 int i = 0;
1093 end = state->end;
1094 while (ptr < end) {
1095 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001096 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001097 if (!i)
1098 break;
1099 else
1100 i = overlap[i];
1101 } else {
1102 if (++i == prefix_len) {
1103 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001104 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1105 state->start = ptr + 1 - prefix_len;
1106 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001107 if (flags & SRE_INFO_LITERAL)
1108 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001109 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001110 if (status != 0)
1111 return status;
1112 /* close but no cigar -- try again */
1113 i = overlap[i];
1114 }
1115 break;
1116 }
1117
1118 }
1119 ptr++;
1120 }
1121 return 0;
1122 }
1123#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001124
Fredrik Lundh3562f112000-07-02 12:00:07 +00001125 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001126 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001127 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001128 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001129 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001130 for (;;) {
1131 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1132 ptr++;
1133 if (ptr == end)
1134 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001135 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001136 state->start = ptr;
1137 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001138 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001139 if (status != 0)
1140 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001141 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001142 } else if (charset) {
1143 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001144 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001145 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001146 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001147 ptr++;
1148 if (ptr == end)
1149 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001150 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001151 state->start = ptr;
1152 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001153 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001154 if (status != 0)
1155 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001156 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001157 }
1158 } else
1159 /* general case */
1160 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001161 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001162 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001163 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001164 if (status != 0)
1165 break;
1166 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001167
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001168 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001169}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001170
Guido van Rossumb700df92000-03-31 14:59:30 +00001171
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001172#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001173
1174/* -------------------------------------------------------------------- */
1175/* factories and destructors */
1176
1177/* see sre.h for object declarations */
1178
1179staticforward PyTypeObject Pattern_Type;
1180staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001181staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001182
1183static PyObject *
1184_compile(PyObject* self_, PyObject* args)
1185{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001186 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001187
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001188 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001189 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001190
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001191 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001192 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001193 PyObject* code;
1194 int groups = 0;
1195 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001196 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001197 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001198 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001199 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001200
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001201 code = PySequence_Fast(code, "code argument must be a sequence");
1202 if (!code)
1203 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001204
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001205#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001206 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001207#else
1208 n = PySequence_Length(code);
1209#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001210
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001211 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1212 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001213 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001214 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001215 }
1216
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001217 for (i = 0; i < n; i++) {
1218 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001219 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001220 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001221
1222 Py_DECREF(code);
1223
1224 if (PyErr_Occurred())
1225 return NULL;
1226
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001227 Py_INCREF(pattern);
1228 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001229
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001230 self->flags = flags;
1231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001232 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001233
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001234 Py_XINCREF(groupindex);
1235 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001236
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001237 Py_XINCREF(indexgroup);
1238 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001239
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001240 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001241}
1242
1243static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001244sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001245{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001246 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001247}
1248
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001249static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001250sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001251{
1252 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001253 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001254 return NULL;
1255 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001256 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001257#if defined(HAVE_UNICODE)
1258 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001259 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001260#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001261 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001262}
1263
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001264LOCAL(void)
1265state_reset(SRE_STATE* state)
1266{
1267 int i;
1268
1269 state->lastmark = 0;
1270
1271 /* FIXME: dynamic! */
1272 for (i = 0; i < SRE_MARK_SIZE; i++)
1273 state->mark[i] = NULL;
1274
1275 state->lastindex = -1;
1276
1277 state->repeat = NULL;
1278
1279 mark_fini(state);
1280}
1281
Guido van Rossumb700df92000-03-31 14:59:30 +00001282LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001283state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1284 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001285{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001286 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001287
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001288 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001289 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001290 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001291
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001292 memset(state, 0, sizeof(SRE_STATE));
1293
1294 state->lastindex = -1;
1295
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001296#if defined(HAVE_UNICODE)
1297 if (PyUnicode_Check(string)) {
1298 /* unicode strings doesn't always support the buffer interface */
1299 ptr = (void*) PyUnicode_AS_DATA(string);
1300 bytes = PyUnicode_GET_DATA_SIZE(string);
1301 size = PyUnicode_GET_SIZE(string);
1302 state->charsize = sizeof(Py_UNICODE);
1303
1304 } else {
1305#endif
1306
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001307 /* get pointer to string buffer */
1308 buffer = string->ob_type->tp_as_buffer;
1309 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1310 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001311 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001312 return NULL;
1313 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001314
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001315 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001316 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1317 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001318 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1319 return NULL;
1320 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001321
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001322 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001323#if PY_VERSION_HEX >= 0x01060000
1324 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001325#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001326 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001327#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001328
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001329 if (PyString_Check(string) || bytes == size)
1330 state->charsize = 1;
1331#if defined(HAVE_UNICODE)
1332 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1333 state->charsize = sizeof(Py_UNICODE);
1334#endif
1335 else {
1336 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1337 return NULL;
1338 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001339
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001340#if defined(HAVE_UNICODE)
1341 }
1342#endif
1343
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001344 /* adjust boundaries */
1345 if (start < 0)
1346 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001347 else if (start > size)
1348 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001349
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001350 if (end < 0)
1351 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001352 else if (end > size)
1353 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001354
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001355 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 state->start = (void*) ((char*) ptr + start * state->charsize);
1358 state->end = (void*) ((char*) ptr + end * state->charsize);
1359
1360 Py_INCREF(string);
1361 state->string = string;
1362 state->pos = start;
1363 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001364
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001365 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001366 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001367#if defined(HAVE_UNICODE)
1368 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001369 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001370#endif
1371 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001372 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001374 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001375}
1376
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001377LOCAL(void)
1378state_fini(SRE_STATE* state)
1379{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001380 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001381 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001382}
1383
1384LOCAL(PyObject*)
1385state_getslice(SRE_STATE* state, int index, PyObject* string)
1386{
Fredrik Lundh58100642000-08-09 09:14:35 +00001387 int i, j;
1388
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001389 index = (index - 1) * 2;
1390
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001391 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001392 i = j = 0;
1393 } else {
1394 i = ((char*)state->mark[index] - (char*)state->beginning) /
1395 state->charsize;
1396 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1397 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001398 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001399
Fredrik Lundh58100642000-08-09 09:14:35 +00001400 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001401}
1402
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001403static void
1404pattern_error(int status)
1405{
1406 switch (status) {
1407 case SRE_ERROR_RECURSION_LIMIT:
1408 PyErr_SetString(
1409 PyExc_RuntimeError,
1410 "maximum recursion limit exceeded"
1411 );
1412 break;
1413 case SRE_ERROR_MEMORY:
1414 PyErr_NoMemory();
1415 break;
1416 default:
1417 /* other error codes indicate compiler/engine bugs */
1418 PyErr_SetString(
1419 PyExc_RuntimeError,
1420 "internal error in regular expression engine"
1421 );
1422 }
1423}
1424
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001425static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001426pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001427{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001428 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001430 MatchObject* match;
1431 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001432 char* base;
1433 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001434
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001435 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001436
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001437 /* create match object (with room for extra group marks) */
1438 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001439 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001440 if (!match)
1441 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001442
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001443 Py_INCREF(pattern);
1444 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001445
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001446 Py_INCREF(state->string);
1447 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001448
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001449 match->regs = NULL;
1450 match->groups = pattern->groups+1;
1451
1452 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001453
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001454 base = (char*) state->beginning;
1455 n = state->charsize;
1456
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001457 match->mark[0] = ((char*) state->start - base) / n;
1458 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001459
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001460 for (i = j = 0; i < pattern->groups; i++, j+=2)
1461 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1462 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1463 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1464 } else
1465 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1466
1467 match->pos = state->pos;
1468 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001469
Fredrik Lundh6f013982000-07-03 18:44:21 +00001470 match->lastindex = state->lastindex;
1471
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001472 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001473
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001474 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001475
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001476 /* no match */
1477 Py_INCREF(Py_None);
1478 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001479
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001480 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001481
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001482 /* internal error */
1483 pattern_error(status);
1484 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001485}
1486
1487static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001488pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001489{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001490 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001491
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001492 ScannerObject* self;
1493
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001494 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001495 int start = 0;
1496 int end = INT_MAX;
1497 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1498 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001499
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001500 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001501 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001502 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001503 return NULL;
1504
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001505 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001506 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001507 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001508 return NULL;
1509 }
1510
1511 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001512 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001513
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001514 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001515}
1516
Guido van Rossumb700df92000-03-31 14:59:30 +00001517static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001518pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001519{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001520 Py_XDECREF(self->pattern);
1521 Py_XDECREF(self->groupindex);
1522 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001523}
1524
1525static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001526pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001527{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001528 SRE_STATE state;
1529 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001530
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001531 PyObject* string;
1532 int start = 0;
1533 int end = INT_MAX;
1534 if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
1535 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001536
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001537 string = state_init(&state, self, string, start, end);
1538 if (!string)
1539 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001540
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001541 state.ptr = state.start;
1542
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001543 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1544
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001545 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001546 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001547 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001548#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001549 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001550#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001551 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001552
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001553 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1554
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001555 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001556
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001557 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001558}
1559
1560static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001561pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001562{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001563 SRE_STATE state;
1564 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001565
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001566 PyObject* string;
1567 int start = 0;
1568 int end = INT_MAX;
1569 if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
1570 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001571
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 string = state_init(&state, self, string, start, end);
1573 if (!string)
1574 return NULL;
1575
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001576 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1577
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001578 if (state.charsize == 1) {
1579 status = sre_search(&state, PatternObject_GetCode(self));
1580 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001581#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001582 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001583#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001584 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001585
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001586 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1587
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001588 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001589
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001590 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001591}
1592
1593static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001594call(char* function, PyObject* args)
1595{
1596 PyObject* name;
1597 PyObject* module;
1598 PyObject* func;
1599 PyObject* result;
1600
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001601 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001602 if (!name)
1603 return NULL;
1604 module = PyImport_Import(name);
1605 Py_DECREF(name);
1606 if (!module)
1607 return NULL;
1608 func = PyObject_GetAttrString(module, function);
1609 Py_DECREF(module);
1610 if (!func)
1611 return NULL;
1612 result = PyObject_CallObject(func, args);
1613 Py_DECREF(func);
1614 Py_DECREF(args);
1615 return result;
1616}
1617
1618static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001619pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001620{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001621 PyObject* template;
1622 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001623 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001624 if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
1625 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001626
1627 /* delegate to Python code */
1628 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1629}
1630
1631static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001632pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001633{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001634 PyObject* template;
1635 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001636 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001637 if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
1638 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001639
1640 /* delegate to Python code */
1641 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1642}
1643
1644static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001645pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001646{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001647 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001648 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001649 if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
1650 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001651
1652 /* delegate to Python code */
1653 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1654}
1655
1656static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001657pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001658{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001659 SRE_STATE state;
1660 PyObject* list;
1661 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001662 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001663
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001664 PyObject* string;
1665 int start = 0;
1666 int end = INT_MAX;
1667 if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
1668 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001669
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001670 string = state_init(&state, self, string, start, end);
1671 if (!string)
1672 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001673
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001674 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001676 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001677
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001678 PyObject* item;
1679
1680 state.ptr = state.start;
1681
1682 if (state.charsize == 1) {
1683 status = sre_search(&state, PatternObject_GetCode(self));
1684 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001685#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001686 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001687#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001688 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001689
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001690 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001691
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001692 /* don't bother to build a match object */
1693 switch (self->groups) {
1694 case 0:
1695 item = PySequence_GetSlice(
1696 string,
1697 ((char*) state.start - (char*) state.beginning) /
1698 state.charsize,
1699 ((char*) state.ptr - (char*) state.beginning) /
1700 state.charsize);
1701 if (!item)
1702 goto error;
1703 break;
1704 case 1:
1705 item = state_getslice(&state, 1, string);
1706 if (!item)
1707 goto error;
1708 break;
1709 default:
1710 item = PyTuple_New(self->groups);
1711 if (!item)
1712 goto error;
1713 for (i = 0; i < self->groups; i++) {
1714 PyObject* o = state_getslice(&state, i+1, string);
1715 if (!o) {
1716 Py_DECREF(item);
1717 goto error;
1718 }
1719 PyTuple_SET_ITEM(item, i, o);
1720 }
1721 break;
1722 }
1723
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001724 status = PyList_Append(list, item);
1725 Py_DECREF(item);
1726
1727 if (status < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001728 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001729
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001730 if (state.ptr == state.start)
1731 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001732 else
1733 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001734
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001735 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001736
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001737 if (status == 0)
1738 break;
1739
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001740 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001741 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001742
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001743 }
1744 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001745
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001746 state_fini(&state);
1747 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001748
1749error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001750 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001751 state_fini(&state);
1752 return NULL;
1753
Guido van Rossumb700df92000-03-31 14:59:30 +00001754}
1755
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001756static PyMethodDef pattern_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001757 {"match", (PyCFunction) pattern_match, 1},
1758 {"search", (PyCFunction) pattern_search, 1},
1759 {"sub", (PyCFunction) pattern_sub, 1},
1760 {"subn", (PyCFunction) pattern_subn, 1},
1761 {"split", (PyCFunction) pattern_split, 1},
1762 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001763 /* experimental */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001764 {"scanner", (PyCFunction) pattern_scanner, 1},
1765 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001766};
1767
1768static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001769pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001770{
1771 PyObject* res;
1772
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001773 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001774
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001775 if (res)
1776 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001777
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001778 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001779
1780 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001781 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001782 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001783 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001784 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001785
1786 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001787 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001788
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001789 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001790 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001791
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001792 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001793 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001794 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001795 }
1796
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001797 PyErr_SetString(PyExc_AttributeError, name);
1798 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001799}
1800
1801statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001802 PyObject_HEAD_INIT(NULL)
1803 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001804 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001805 (destructor)pattern_dealloc, /*tp_dealloc*/
1806 0, /*tp_print*/
1807 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001808};
1809
1810/* -------------------------------------------------------------------- */
1811/* match methods */
1812
1813static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001814match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001815{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001816 Py_XDECREF(self->regs);
1817 Py_XDECREF(self->string);
1818 Py_DECREF(self->pattern);
1819 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001820}
1821
1822static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001823match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001824{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001825 if (index < 0 || index >= self->groups) {
1826 /* raise IndexError if we were given a bad group number */
1827 PyErr_SetString(
1828 PyExc_IndexError,
1829 "no such group"
1830 );
1831 return NULL;
1832 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001833
Fredrik Lundh6f013982000-07-03 18:44:21 +00001834 index *= 2;
1835
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001836 if (self->string == Py_None || self->mark[index] < 0) {
1837 /* return default value if the string or group is undefined */
1838 Py_INCREF(def);
1839 return def;
1840 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001841
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001842 return PySequence_GetSlice(
1843 self->string, self->mark[index], self->mark[index+1]
1844 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001845}
1846
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001847static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001848match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001849{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001850 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001851
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001852 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001853 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001854
Fredrik Lundh6f013982000-07-03 18:44:21 +00001855 i = -1;
1856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001857 if (self->pattern->groupindex) {
1858 index = PyObject_GetItem(self->pattern->groupindex, index);
1859 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001860 if (PyInt_Check(index))
1861 i = (int) PyInt_AS_LONG(index);
1862 Py_DECREF(index);
1863 } else
1864 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001865 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001866
1867 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001868}
1869
1870static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001871match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001872{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001873 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001874}
1875
1876static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001877match_expand(MatchObject* self, PyObject* args)
1878{
1879 PyObject* template;
1880 if (!PyArg_ParseTuple(args, "O:expand", &template))
1881 return NULL;
1882
1883 /* delegate to Python code */
1884 return call(
1885 "_expand",
1886 Py_BuildValue("OOO", self->pattern, self, template)
1887 );
1888}
1889
1890static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001891match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001892{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 PyObject* result;
1894 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001895
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001896 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001897
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001898 switch (size) {
1899 case 0:
1900 result = match_getslice(self, Py_False, Py_None);
1901 break;
1902 case 1:
1903 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1904 break;
1905 default:
1906 /* fetch multiple items */
1907 result = PyTuple_New(size);
1908 if (!result)
1909 return NULL;
1910 for (i = 0; i < size; i++) {
1911 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001912 self, PyTuple_GET_ITEM(args, i), Py_None
1913 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001914 if (!item) {
1915 Py_DECREF(result);
1916 return NULL;
1917 }
1918 PyTuple_SET_ITEM(result, i, item);
1919 }
1920 break;
1921 }
1922 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001923}
1924
1925static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001926match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001927{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001928 PyObject* result;
1929 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001930
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001931 PyObject* def = Py_None;
1932 if (!PyArg_ParseTuple(args, "|O:groups", &def))
1933 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001934
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001935 result = PyTuple_New(self->groups-1);
1936 if (!result)
1937 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001938
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001939 for (index = 1; index < self->groups; index++) {
1940 PyObject* item;
1941 item = match_getslice_by_index(self, index, def);
1942 if (!item) {
1943 Py_DECREF(result);
1944 return NULL;
1945 }
1946 PyTuple_SET_ITEM(result, index-1, item);
1947 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001948
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001949 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001950}
1951
1952static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001953match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001954{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001955 PyObject* result;
1956 PyObject* keys;
1957 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001958
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001959 PyObject* def = Py_None;
1960 if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
1961 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001962
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001963 result = PyDict_New();
1964 if (!result || !self->pattern->groupindex)
1965 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001966
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001967 keys = PyMapping_Keys(self->pattern->groupindex);
1968 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001969 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001970 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001971 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001972
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001973 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1974 PyObject* key;
1975 PyObject* item;
1976 key = PyList_GET_ITEM(keys, index);
1977 if (!key) {
1978 Py_DECREF(keys);
1979 Py_DECREF(result);
1980 return NULL;
1981 }
1982 item = match_getslice(self, key, def);
1983 if (!item) {
1984 Py_DECREF(key);
1985 Py_DECREF(keys);
1986 Py_DECREF(result);
1987 return NULL;
1988 }
1989 /* FIXME: <fl> this can fail, right? */
1990 PyDict_SetItem(result, key, item);
1991 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001992
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001993 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001994
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001995 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001996}
1997
1998static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001999match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002000{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002001 int index;
2002
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002003 PyObject* index_ = Py_False; /* zero */
2004 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2005 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002006
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002007 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002008
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002009 if (index < 0 || index >= self->groups) {
2010 PyErr_SetString(
2011 PyExc_IndexError,
2012 "no such group"
2013 );
2014 return NULL;
2015 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002016
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002017 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002018 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002019}
2020
2021static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002022match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002023{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002024 int index;
2025
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002026 PyObject* index_ = Py_False; /* zero */
2027 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2028 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002029
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002030 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002031
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002032 if (index < 0 || index >= self->groups) {
2033 PyErr_SetString(
2034 PyExc_IndexError,
2035 "no such group"
2036 );
2037 return NULL;
2038 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002039
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002040 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002041 return Py_BuildValue("i", self->mark[index*2+1]);
2042}
2043
2044LOCAL(PyObject*)
2045_pair(int i1, int i2)
2046{
2047 PyObject* pair;
2048 PyObject* item;
2049
2050 pair = PyTuple_New(2);
2051 if (!pair)
2052 return NULL;
2053
2054 item = PyInt_FromLong(i1);
2055 if (!item)
2056 goto error;
2057 PyTuple_SET_ITEM(pair, 0, item);
2058
2059 item = PyInt_FromLong(i2);
2060 if (!item)
2061 goto error;
2062 PyTuple_SET_ITEM(pair, 1, item);
2063
2064 return pair;
2065
2066 error:
2067 Py_DECREF(pair);
2068 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002069}
2070
2071static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002072match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002073{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002074 int index;
2075
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002076 PyObject* index_ = Py_False; /* zero */
2077 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2078 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002079
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002080 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002081
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 if (index < 0 || index >= self->groups) {
2083 PyErr_SetString(
2084 PyExc_IndexError,
2085 "no such group"
2086 );
2087 return NULL;
2088 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002089
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002090 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002091 return _pair(self->mark[index*2], self->mark[index*2+1]);
2092}
2093
2094static PyObject*
2095match_regs(MatchObject* self)
2096{
2097 PyObject* regs;
2098 PyObject* item;
2099 int index;
2100
2101 regs = PyTuple_New(self->groups);
2102 if (!regs)
2103 return NULL;
2104
2105 for (index = 0; index < self->groups; index++) {
2106 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2107 if (!item) {
2108 Py_DECREF(regs);
2109 return NULL;
2110 }
2111 PyTuple_SET_ITEM(regs, index, item);
2112 }
2113
2114 Py_INCREF(regs);
2115 self->regs = regs;
2116
2117 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002118}
2119
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002120static PyMethodDef match_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 {"group", (PyCFunction) match_group, 1},
2122 {"start", (PyCFunction) match_start, 1},
2123 {"end", (PyCFunction) match_end, 1},
2124 {"span", (PyCFunction) match_span, 1},
2125 {"groups", (PyCFunction) match_groups, 1},
2126 {"groupdict", (PyCFunction) match_groupdict, 1},
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002127 {"expand", (PyCFunction) match_expand, 1},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002128 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002129};
2130
2131static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002132match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002133{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002134 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002135
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2137 if (res)
2138 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002139
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002140 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002141
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002142 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002143 if (self->lastindex >= 0)
2144 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002145 Py_INCREF(Py_None);
2146 return Py_None;
2147 }
2148
2149 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002150 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002151 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002152 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002153 );
2154 if (result)
2155 return result;
2156 PyErr_Clear();
2157 }
2158 Py_INCREF(Py_None);
2159 return Py_None;
2160 }
2161
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002162 if (!strcmp(name, "string")) {
2163 if (self->string) {
2164 Py_INCREF(self->string);
2165 return self->string;
2166 } else {
2167 Py_INCREF(Py_None);
2168 return Py_None;
2169 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002170 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002171
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002172 if (!strcmp(name, "regs")) {
2173 if (self->regs) {
2174 Py_INCREF(self->regs);
2175 return self->regs;
2176 } else
2177 return match_regs(self);
2178 }
2179
2180 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002181 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002182 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002183 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002184
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002185 if (!strcmp(name, "pos"))
2186 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002187
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002188 if (!strcmp(name, "endpos"))
2189 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002190
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002191 PyErr_SetString(PyExc_AttributeError, name);
2192 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002193}
2194
2195/* FIXME: implement setattr("string", None) as a special case (to
2196 detach the associated string, if any */
2197
2198statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002199 PyObject_HEAD_INIT(NULL)
2200 0, "SRE_Match",
2201 sizeof(MatchObject), sizeof(int),
2202 (destructor)match_dealloc, /*tp_dealloc*/
2203 0, /*tp_print*/
2204 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002205};
2206
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002207/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002208/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002209
2210static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002211scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002212{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002213 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002214 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002215 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002216}
2217
2218static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002219scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002220{
2221 SRE_STATE* state = &self->state;
2222 PyObject* match;
2223 int status;
2224
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002225 state_reset(state);
2226
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002227 state->ptr = state->start;
2228
2229 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002230 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002231 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002232#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002233 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002234#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002235 }
2236
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002237 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002238 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002239
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002240 if (status == 0 || state->ptr == state->start)
2241 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002242 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002243 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002244
2245 return match;
2246}
2247
2248
2249static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002250scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002251{
2252 SRE_STATE* state = &self->state;
2253 PyObject* match;
2254 int status;
2255
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002256 state_reset(state);
2257
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002258 state->ptr = state->start;
2259
2260 if (state->charsize == 1) {
2261 status = sre_search(state, PatternObject_GetCode(self->pattern));
2262 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002263#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002264 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002265#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002266 }
2267
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002268 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002269 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002270
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002271 if (status == 0 || state->ptr == state->start)
2272 state->start = (void*) ((char*) state->ptr + state->charsize);
2273 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002274 state->start = state->ptr;
2275
2276 return match;
2277}
2278
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002279static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002280 {"match", (PyCFunction) scanner_match, 0},
2281 {"search", (PyCFunction) scanner_search, 0},
2282 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002283};
2284
2285static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002286scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002287{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002288 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002289
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002290 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2291 if (res)
2292 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002293
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002294 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002295
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002296 /* attributes */
2297 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002298 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002299 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002300 }
2301
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002302 PyErr_SetString(PyExc_AttributeError, name);
2303 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002304}
2305
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002306statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002307 PyObject_HEAD_INIT(NULL)
2308 0, "SRE_Scanner",
2309 sizeof(ScannerObject), 0,
2310 (destructor)scanner_dealloc, /*tp_dealloc*/
2311 0, /*tp_print*/
2312 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002313};
2314
Guido van Rossumb700df92000-03-31 14:59:30 +00002315static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002316 {"compile", _compile, 1},
2317 {"getcodesize", sre_codesize, 1},
2318 {"getlower", sre_getlower, 1},
2319 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002320};
2321
2322void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002323#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002324__declspec(dllexport)
2325#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002326init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002327{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002328 /* Patch object types */
2329 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002330 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002331
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002332 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002333}
2334
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002335#endif /* !defined(SRE_RECURSIVE) */