blob: 61ee694ad42b62abbf131d0f80f8a9a64fdd71c1 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00007 * 99-10-24 fl created (based on existing template matcher code)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00008 * 00-03-06 fl first alpha, sort of (0.5)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00009 * 00-06-30 fl added fast search optimization (0.9.3)
10 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11 * 00-07-02 fl added charset optimizations, etc (0.9.5)
12 * 00-07-03 fl store code in pattern object, lookbehind, etc
13 * 00-07-08 fl added regs attribute
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000014 * 00-07-21 fl reset lastindex in scanner methods (0.9.6)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000015 * 00-08-01 fl fixes for 1.6b1 (0.9.8)
Fredrik Lundh96ab4652000-08-03 16:29:50 +000016 * 00-08-03 fl added recursion limit
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000017 * 00-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh58100642000-08-09 09:14:35 +000018 * 00-08-08 fl changed findall to return empty strings instead of None
Fredrik Lundh33accc12000-08-27 20:59:47 +000019 * 00-08-27 fl properly propagate memory errors
Guido van Rossumb700df92000-03-31 14:59:30 +000020 *
21 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
22 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000023 * This version of the SRE library can be redistributed under CNRI's
24 * Python 1.6 license. For any other use, please contact Secret Labs
25 * AB (info@pythonware.com).
26 *
Guido van Rossumb700df92000-03-31 14:59:30 +000027 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000029 * other compatibility work.
30 */
31
32#ifndef SRE_RECURSIVE
33
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000035
36#include "Python.h"
37
38#include "sre.h"
39
Guido van Rossumb700df92000-03-31 14:59:30 +000040#if defined(HAVE_LIMITS_H)
41#include <limits.h>
42#else
43#define INT_MAX 2147483647
44#endif
45
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000046#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000047
Fredrik Lundh436c3d582000-06-29 08:58:44 +000048/* name of this module, minus the leading underscore */
49#define MODULE "sre"
50
Guido van Rossumb700df92000-03-31 14:59:30 +000051/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000052#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000053
Fredrik Lundh436c3d582000-06-29 08:58:44 +000054#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000055/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056#define HAVE_UNICODE
57#endif
58
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000060/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000061
Fredrik Lundh33accc12000-08-27 20:59:47 +000062/* prevent run-away recursion (bad patterns on long strings) */
63
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000064#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000065#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
66/* require smaller recursion limit for a number of 64-bit platforms:
67 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
68/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
69#define USE_RECURSION_LIMIT 7500
70#else
71#define USE_RECURSION_LIMIT 10000
72#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000073#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000074
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define USE_FAST_SEARCH
77
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000079#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080
81/* -------------------------------------------------------------------- */
82
Fredrik Lundh80946112000-06-29 18:03:25 +000083#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000084#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000085#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000086/* fastest possible local call under MSVC */
87#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000089#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000090#else
91#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000092#endif
93
94/* error codes */
95#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000096#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000097#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000098#define SRE_ERROR_MEMORY -9 /* out of memory */
99
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000101#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000102#else
103#define TRACE(v)
104#endif
105
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000106/* -------------------------------------------------------------------- */
107/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000108
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000109/* default character predicates (run sre_chars.py to regenerate tables) */
110
111#define SRE_DIGIT_MASK 1
112#define SRE_SPACE_MASK 2
113#define SRE_LINEBREAK_MASK 4
114#define SRE_ALNUM_MASK 8
115#define SRE_WORD_MASK 16
116
117static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1182, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1190, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12025, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1220, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
124
Fredrik Lundhb389df32000-06-29 12:48:37 +0000125static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012610, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12727, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12844, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12961, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
130108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
131122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
132106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
133120, 121, 122, 123, 124, 125, 126, 127 };
134
Fredrik Lundhb389df32000-06-29 12:48:37 +0000135static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000136{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000137 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000138}
139
140#define SRE_IS_DIGIT(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
142#define SRE_IS_SPACE(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
144#define SRE_IS_LINEBREAK(ch)\
145 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
146#define SRE_IS_ALNUM(ch)\
147 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
148#define SRE_IS_WORD(ch)\
149 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000150
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000151/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000152
Fredrik Lundhb389df32000-06-29 12:48:37 +0000153static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000154{
155 return ((ch) < 256 ? tolower((ch)) : ch);
156}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000157#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
158#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
159#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
160#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
161#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
162
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000163/* unicode-specific character predicates */
164
165#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000166static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000167{
168 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
169}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000170#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
171#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
172#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000173#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000175#endif
176
Guido van Rossumb700df92000-03-31 14:59:30 +0000177LOCAL(int)
178sre_category(SRE_CODE category, unsigned int ch)
179{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000180 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_DIGIT:
183 return SRE_IS_DIGIT(ch);
184 case SRE_CATEGORY_NOT_DIGIT:
185 return !SRE_IS_DIGIT(ch);
186 case SRE_CATEGORY_SPACE:
187 return SRE_IS_SPACE(ch);
188 case SRE_CATEGORY_NOT_SPACE:
189 return !SRE_IS_SPACE(ch);
190 case SRE_CATEGORY_WORD:
191 return SRE_IS_WORD(ch);
192 case SRE_CATEGORY_NOT_WORD:
193 return !SRE_IS_WORD(ch);
194 case SRE_CATEGORY_LINEBREAK:
195 return SRE_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_NOT_LINEBREAK:
197 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000198
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000199 case SRE_CATEGORY_LOC_WORD:
200 return SRE_LOC_IS_WORD(ch);
201 case SRE_CATEGORY_LOC_NOT_WORD:
202 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000203
204#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000205 case SRE_CATEGORY_UNI_DIGIT:
206 return SRE_UNI_IS_DIGIT(ch);
207 case SRE_CATEGORY_UNI_NOT_DIGIT:
208 return !SRE_UNI_IS_DIGIT(ch);
209 case SRE_CATEGORY_UNI_SPACE:
210 return SRE_UNI_IS_SPACE(ch);
211 case SRE_CATEGORY_UNI_NOT_SPACE:
212 return !SRE_UNI_IS_SPACE(ch);
213 case SRE_CATEGORY_UNI_WORD:
214 return SRE_UNI_IS_WORD(ch);
215 case SRE_CATEGORY_UNI_NOT_WORD:
216 return !SRE_UNI_IS_WORD(ch);
217 case SRE_CATEGORY_UNI_LINEBREAK:
218 return SRE_UNI_IS_LINEBREAK(ch);
219 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
220 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000221#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000222 }
223 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000224}
225
226/* helpers */
227
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000228static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000229mark_fini(SRE_STATE* state)
230{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000231 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000232 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000233 state->mark_stack = NULL;
234 }
235 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000236}
237
238static int
239mark_save(SRE_STATE* state, int lo, int hi)
240{
241 void* stack;
242 int size;
243 int minsize, newsize;
244
245 if (hi <= lo)
246 return 0;
247
248 size = (hi - lo) + 1;
249
250 newsize = state->mark_stack_size;
251 minsize = state->mark_stack_base + size;
252
253 if (newsize < minsize) {
254 /* create new stack */
255 if (!newsize) {
256 newsize = 512;
257 if (newsize < minsize)
258 newsize = minsize;
259 TRACE(("allocate stack %d\n", newsize));
260 stack = malloc(sizeof(void*) * newsize);
261 } else {
262 /* grow the stack */
263 while (newsize < minsize)
264 newsize += newsize;
265 TRACE(("grow stack to %d\n", newsize));
266 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
267 }
268 if (!stack) {
269 mark_fini(state);
270 return SRE_ERROR_MEMORY;
271 }
272 state->mark_stack = stack;
273 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000274 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000275
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000276 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000277
278 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
279 size * sizeof(void*));
280
281 state->mark_stack_base += size;
282
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000283 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000284}
285
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000286static int
287mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000288{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000289 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000290
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000291 if (hi <= lo)
292 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000293
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000295
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000296 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000297
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000298 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000299
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000300 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
301 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000302
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000303 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000304}
305
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000306/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000307
308#define SRE_CHAR unsigned char
309#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000310#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000311#define SRE_CHARSET sre_charset
312#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000313#define SRE_MATCH sre_match
314#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000315
316#if defined(HAVE_UNICODE)
317
Guido van Rossumb700df92000-03-31 14:59:30 +0000318#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000319#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000320#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000321
Guido van Rossumb700df92000-03-31 14:59:30 +0000322#undef SRE_SEARCH
323#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000324#undef SRE_INFO
325#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000326#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000327#undef SRE_AT
328#undef SRE_CHAR
329
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000330/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000331
332#define SRE_CHAR Py_UNICODE
333#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000334#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000335#define SRE_CHARSET sre_ucharset
336#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000337#define SRE_MATCH sre_umatch
338#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000339#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
341#endif /* SRE_RECURSIVE */
342
343/* -------------------------------------------------------------------- */
344/* String matching engine */
345
346/* the following section is compiled twice, with different character
347 settings */
348
349LOCAL(int)
350SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
351{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000352 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000353
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000354 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000355
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000356 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000357
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000358 case SRE_AT_BEGINNING:
359 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000360
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000361 case SRE_AT_BEGINNING_LINE:
362 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000363 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000364
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000365 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000366 return (((void*) (ptr+1) == state->end &&
367 SRE_IS_LINEBREAK((int) ptr[0])) ||
368 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000369
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000370 case SRE_AT_END_LINE:
371 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000372 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000374 case SRE_AT_BOUNDARY:
375 if (state->beginning == state->end)
376 return 0;
377 that = ((void*) ptr > state->beginning) ?
378 SRE_IS_WORD((int) ptr[-1]) : 0;
379 this = ((void*) ptr < state->end) ?
380 SRE_IS_WORD((int) ptr[0]) : 0;
381 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000382
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000383 case SRE_AT_NON_BOUNDARY:
384 if (state->beginning == state->end)
385 return 0;
386 that = ((void*) ptr > state->beginning) ?
387 SRE_IS_WORD((int) ptr[-1]) : 0;
388 this = ((void*) ptr < state->end) ?
389 SRE_IS_WORD((int) ptr[0]) : 0;
390 return this == that;
391 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394}
395
396LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000397SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000398{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000400
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000402
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000403 for (;;) {
404 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000405
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000406 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000407 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000408 if (ch == set[0])
409 return ok;
410 set++;
411 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000414 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000415 if (set[0] <= ch && ch <= set[1])
416 return ok;
417 set += 2;
418 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000419
Fredrik Lundh3562f112000-07-02 12:00:07 +0000420 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000421 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000422 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
423 return ok;
424 set += 16;
425 break;
426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000427 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000428 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 if (sre_category(set[0], (int) ch))
430 return ok;
431 set += 1;
432 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000433
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000434 case SRE_OP_NEGATE:
435 ok = !ok;
436 break;
437
438 case SRE_OP_FAILURE:
439 return !ok;
440
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000441 default:
442 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000443 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 return 0;
445 }
446 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000447}
448
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000449LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
450
451LOCAL(int)
452SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
453{
454 SRE_CODE chr;
455 SRE_CHAR* ptr = state->ptr;
456 SRE_CHAR* end = state->end;
457 int i;
458
459 /* adjust end */
460 if (maxcount < end - ptr && maxcount != 65535)
461 end = ptr + maxcount;
462
463 switch (pattern[0]) {
464
465 case SRE_OP_ANY:
466 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000467 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000468 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
469 ptr++;
470 break;
471
472 case SRE_OP_ANY_ALL:
473 /* repeated dot wildcare. skip to the end of the target
474 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000475 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000476 ptr = end;
477 break;
478
479 case SRE_OP_LITERAL:
480 /* repeated literal */
481 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000482 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000483 while (ptr < end && (SRE_CODE) *ptr == chr)
484 ptr++;
485 break;
486
487 case SRE_OP_LITERAL_IGNORE:
488 /* repeated literal */
489 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000490 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000491 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
492 ptr++;
493 break;
494
495 case SRE_OP_NOT_LITERAL:
496 /* repeated non-literal */
497 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000498 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000499 while (ptr < end && (SRE_CODE) *ptr != chr)
500 ptr++;
501 break;
502
503 case SRE_OP_NOT_LITERAL_IGNORE:
504 /* repeated non-literal */
505 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000506 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000507 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
508 ptr++;
509 break;
510
511 case SRE_OP_IN:
512 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000513 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
514 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000515 ptr++;
516 break;
517
518 default:
519 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000520 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000521 while ((SRE_CHAR*) state->ptr < end) {
522 i = SRE_MATCH(state, pattern, level);
523 if (i < 0)
524 return i;
525 if (!i)
526 break;
527 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000528 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
529 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000530 return (SRE_CHAR*) state->ptr - ptr;
531 }
532
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000533 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000534 return ptr - (SRE_CHAR*) state->ptr;
535}
536
Fredrik Lundh33accc12000-08-27 20:59:47 +0000537#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000538LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000539SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
540{
541 /* check if an SRE_OP_INFO block matches at the current position.
542 returns the number of SRE_CODE objects to skip if successful, 0
543 if no match */
544
545 SRE_CHAR* end = state->end;
546 SRE_CHAR* ptr = state->ptr;
547 int i;
548
549 /* check minimal length */
550 if (pattern[3] && (end - ptr) < pattern[3])
551 return 0;
552
553 /* check known prefix */
554 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
555 /* <length> <skip> <prefix data> <overlap data> */
556 for (i = 0; i < pattern[5]; i++)
557 if ((SRE_CODE) ptr[i] != pattern[7 + i])
558 return 0;
559 return pattern[0] + 2 * pattern[6];
560 }
561 return pattern[0];
562}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000563#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000564
565LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000566SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000567{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000568 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000569 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000570
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000571 SRE_CHAR* end = state->end;
572 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000573 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000574 SRE_REPEAT* rp;
575 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000576 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000577
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000578 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000579
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000581
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000582#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000583 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000584 return SRE_ERROR_RECURSION_LIMIT;
585#endif
586
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000587#if defined(USE_RECURSION_LIMIT)
588 if (level > USE_RECURSION_LIMIT)
589 return SRE_ERROR_RECURSION_LIMIT;
590#endif
591
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000592 if (pattern[0] == SRE_OP_INFO) {
593 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000594 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000595 if (pattern[3] && (end - ptr) < pattern[3]) {
596 TRACE(("reject (got %d chars, need %d)\n",
597 (end - ptr), pattern[3]));
598 return 0;
599 }
600 pattern += pattern[1] + 1;
601 }
602
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000603 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000605 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000606
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000607 case SRE_OP_FAILURE:
608 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000609 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000610 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000611
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000612 case SRE_OP_SUCCESS:
613 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000614 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000615 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000616 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000617
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000618 case SRE_OP_AT:
619 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000620 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000621 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000622 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000623 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000624 pattern++;
625 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000626
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000627 case SRE_OP_CATEGORY:
628 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000629 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000630 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000631 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000632 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000633 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000634 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000635 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000636
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000637 case SRE_OP_LITERAL:
638 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000639 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000640 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000641 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000642 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000643 pattern++;
644 ptr++;
645 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000646
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000647 case SRE_OP_NOT_LITERAL:
648 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000649 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000650 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000651 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000652 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000653 pattern++;
654 ptr++;
655 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000656
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000657 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000658 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000659 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000660 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000661 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
662 return 0;
663 ptr++;
664 break;
665
666 case SRE_OP_ANY_ALL:
667 /* match anything */
668 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000669 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000670 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000671 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000672 ptr++;
673 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000674
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000675 case SRE_OP_IN:
676 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000677 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000678 TRACE(("|%p|%p|IN\n", pattern, ptr));
679 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000680 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000681 pattern += pattern[0];
682 ptr++;
683 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000685 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000686 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000687 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000688 i = pattern[0];
689 {
690 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
691 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
692 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000693 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000694 while (p < e) {
695 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000696 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000697 p++; ptr++;
698 }
699 }
700 pattern++;
701 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000702
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000703 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000704 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000705 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000706 i = pattern[0];
707 {
708 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
709 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
710 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000711 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000712 while (p < e) {
713 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000714 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000715 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000716 p++; ptr++;
717 }
718 }
719 pattern++;
720 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000721
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000722 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000723 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000725 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000726 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 pattern++;
728 ptr++;
729 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000730
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000731 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000732 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000733 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000734 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000735 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 pattern++;
737 ptr++;
738 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000739
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000741 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000743 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000744 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 pattern += pattern[0];
746 ptr++;
747 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000749 case SRE_OP_MARK:
750 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000751 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000752 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000753 i = pattern[0];
754 if (i & 1)
755 state->lastindex = i/2 + 1;
756 if (i > state->lastmark)
757 state->lastmark = i;
758 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 pattern++;
760 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000761
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 case SRE_OP_JUMP:
763 case SRE_OP_INFO:
764 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000765 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000766 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 pattern += pattern[0];
768 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000769
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000770 case SRE_OP_ASSERT:
771 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000772 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000773 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000774 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000775 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000776 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000777 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000778 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000779 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000780 if (pattern[1] > 0 && state->ptr != ptr)
781 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000782 pattern += pattern[0];
783 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000784
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000785 case SRE_OP_ASSERT_NOT:
786 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000787 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000788 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000789 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000790 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000791 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000792 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000793 if (i < 0)
794 return i;
795 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000796 return 0;
797 if (pattern[1] > 0 && state->ptr != ptr)
798 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000799 pattern += pattern[0];
800 break;
801
802 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000803 /* alternation */
804 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000805 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000806 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000807 for (; pattern[0]; pattern += pattern[0]) {
808 if (pattern[1] == SRE_OP_LITERAL &&
809 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
810 continue;
811 if (pattern[1] == SRE_OP_IN &&
812 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
813 continue;
814 state->ptr = ptr;
815 i = SRE_MATCH(state, pattern + 1, level + 1);
816 if (i)
817 return i;
818 if (state->lastmark > lastmark) {
819 memset(
820 state->mark + lastmark + 1, 0,
821 (state->lastmark - lastmark) * sizeof(void*)
822 );
823 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000824 }
825 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000826 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000827
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000828 case SRE_OP_REPEAT_ONE:
829 /* match repeated sequence (maximizing regexp) */
830
831 /* this operator only works if the repeated item is
832 exactly one character wide, and we're not already
833 collecting backtracking points. for other cases,
834 use the MAX_REPEAT operator instead */
835
836 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
837
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000838 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000839 pattern[1], pattern[2]));
840
Fredrik Lundhe1869832000-08-01 22:47:49 +0000841 if (ptr + pattern[1] > end)
842 return 0; /* cannot match */
843
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000844 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000845
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000846 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
847 if (count < 0)
848 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000849
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000850 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000851
852 /* when we arrive here, count contains the number of
853 matches, and ptr points to the tail of the target
854 string. check if the rest of the pattern matches,
855 and backtrack if not. */
856
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000857 if (count < (int) pattern[1])
858 return 0;
859
860 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
861 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000862 state->ptr = ptr;
863 return 1;
864
865 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
866 /* tail starts with a literal. skip positions where
867 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000868 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000869 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000870 while (count >= (int) pattern[1] &&
871 (ptr >= end || *ptr != chr)) {
872 ptr--;
873 count--;
874 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000875 if (count < (int) pattern[1])
876 break;
877 state->ptr = ptr;
878 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000879 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000880 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000881 ptr--;
882 count--;
883 }
884
885 } else {
886 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000887 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000888 while (count >= (int) pattern[1]) {
889 state->ptr = ptr;
890 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000891 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000892 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000893 ptr--;
894 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000895 if (state->lastmark > lastmark) {
896 memset(
897 state->mark + lastmark + 1, 0,
898 (state->lastmark - lastmark) * sizeof(void*)
899 );
900 state->lastmark = lastmark;
901 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000902 }
903 }
904 return 0;
905
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000906 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000907 /* create repeat context. all the hard work is done
908 by the UNTIL operator */
909 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000910 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000911 pattern[1], pattern[2]));
912
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000913 rep.count = -1;
914 rep.pattern = pattern;
915
916 /* install new repeat context */
917 rep.prev = state->repeat;
918 state->repeat = &rep;
919
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000920 state->ptr = ptr;
921 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000922
923 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000924
925 return i;
926
927 case SRE_OP_MAX_UNTIL:
928 /* maximizing repeat */
929 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
930
931 /* FIXME: we probably need to deal with zero-width
932 matches in here... */
933
934 rp = state->repeat;
935 if (!rp)
936 return SRE_ERROR_STATE;
937
938 state->ptr = ptr;
939
940 count = rp->count + 1;
941
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000942 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000943
944 if (count < rp->pattern[1]) {
945 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000946 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000947 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000948 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000949 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000950 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000951 rp->count = count - 1;
952 state->ptr = ptr;
953 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000954 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000955
956 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000957 /* we may have enough matches, but if we can
958 match another item, do so */
959 rp->count = count;
960 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000961 i = mark_save(state, 0, lastmark);
962 if (i < 0)
963 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000964 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000965 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000966 if (i)
967 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000968 i = mark_restore(state, 0, lastmark);
969 if (i < 0)
970 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000971 rp->count = count - 1;
972 state->ptr = ptr;
973 }
974
975 /* cannot match more repeated items here. make sure the
976 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000977 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000978 i = SRE_MATCH(state, pattern, level + 1);
979 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000980 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000981 state->repeat = rp;
982 return 0;
983
984 case SRE_OP_MIN_UNTIL:
985 /* minimizing repeat */
986 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
987
988 rp = state->repeat;
989 if (!rp)
990 return SRE_ERROR_STATE;
991
992 count = rp->count + 1;
993
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000994 TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000995
996 state->ptr = ptr;
997
998 if (count < rp->pattern[1]) {
999 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001000 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001001 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001002 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001003 if (i)
1004 return i;
1005 rp->count = count-1;
1006 state->ptr = ptr;
1007 return 0;
1008 }
1009
1010 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001011 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001012 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001013 if (i) {
1014 /* free(rp); */
1015 return i;
1016 }
1017 state->repeat = rp;
1018
1019 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1020 return 0;
1021
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001022 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001023 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001024 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001025 if (i)
1026 return i;
1027 rp->count = count - 1;
1028 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001029
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001030 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001031 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001032 return SRE_ERROR_ILLEGAL;
1033 }
1034 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001035
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001036 /* shouldn't end up here */
1037 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001038}
1039
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001040LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001041SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1042{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001043 SRE_CHAR* ptr = state->start;
1044 SRE_CHAR* end = state->end;
1045 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001046 int prefix_len = 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001047 int prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001048 SRE_CODE* prefix = NULL;
1049 SRE_CODE* charset = NULL;
1050 SRE_CODE* overlap = NULL;
1051 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001052
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001053 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001054 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001055 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001056
1057 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001058
1059 if (pattern[3] > 0) {
1060 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001061 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001062 end -= pattern[3]-1;
1063 if (end <= ptr)
1064 end = ptr+1;
1065 }
1066
Fredrik Lundh3562f112000-07-02 12:00:07 +00001067 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001068 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001069 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001070 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001071 prefix_skip = pattern[6];
1072 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001073 overlap = prefix + prefix_len - 1;
1074 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001075 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001076 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001077 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001078
1079 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001080 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001081
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001082 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1083 TRACE(("charset = %p\n", charset));
1084
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001085#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001086 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001087 /* pattern starts with a known prefix. use the overlap
1088 table to skip forward as fast as we possibly can */
1089 int i = 0;
1090 end = state->end;
1091 while (ptr < end) {
1092 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001093 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001094 if (!i)
1095 break;
1096 else
1097 i = overlap[i];
1098 } else {
1099 if (++i == prefix_len) {
1100 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001101 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1102 state->start = ptr + 1 - prefix_len;
1103 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001104 if (flags & SRE_INFO_LITERAL)
1105 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001106 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001107 if (status != 0)
1108 return status;
1109 /* close but no cigar -- try again */
1110 i = overlap[i];
1111 }
1112 break;
1113 }
1114
1115 }
1116 ptr++;
1117 }
1118 return 0;
1119 }
1120#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001121
Fredrik Lundh3562f112000-07-02 12:00:07 +00001122 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001123 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001124 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001125 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001126 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001127 for (;;) {
1128 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1129 ptr++;
1130 if (ptr == end)
1131 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001132 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001133 state->start = ptr;
1134 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001135 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001136 if (status != 0)
1137 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001138 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001139 } else if (charset) {
1140 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001141 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001142 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001143 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001144 ptr++;
1145 if (ptr == end)
1146 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001147 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001148 state->start = ptr;
1149 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001150 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001151 if (status != 0)
1152 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001153 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001154 }
1155 } else
1156 /* general case */
1157 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001158 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001159 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001160 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001161 if (status != 0)
1162 break;
1163 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001164
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001165 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001166}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001167
Guido van Rossumb700df92000-03-31 14:59:30 +00001168
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001169#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001170
1171/* -------------------------------------------------------------------- */
1172/* factories and destructors */
1173
1174/* see sre.h for object declarations */
1175
1176staticforward PyTypeObject Pattern_Type;
1177staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001178staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001179
1180static PyObject *
1181_compile(PyObject* self_, PyObject* args)
1182{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001183 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001184
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001185 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001186 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001187
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001188 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001189 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001190 PyObject* code;
1191 int groups = 0;
1192 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001193 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001194 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001195 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001196 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001197
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001198 code = PySequence_Fast(code, "code argument must be a sequence");
1199 if (!code)
1200 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001201
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001202#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001203 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001204#else
1205 n = PySequence_Length(code);
1206#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001207
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001208 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1209 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001210 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001211 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001212 }
1213
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001214 for (i = 0; i < n; i++) {
1215 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001216 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001217 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001218
1219 Py_DECREF(code);
1220
1221 if (PyErr_Occurred())
1222 return NULL;
1223
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001224 Py_INCREF(pattern);
1225 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001226
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001227 self->flags = flags;
1228
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001229 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001230
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001231 Py_XINCREF(groupindex);
1232 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001233
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001234 Py_XINCREF(indexgroup);
1235 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001236
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001237 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001238}
1239
1240static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001241sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001242{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001243 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001244}
1245
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001246static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001247sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001248{
1249 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001250 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001251 return NULL;
1252 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001253 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001254#if defined(HAVE_UNICODE)
1255 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001256 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001257#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001258 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001259}
1260
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001261LOCAL(void)
1262state_reset(SRE_STATE* state)
1263{
1264 int i;
1265
1266 state->lastmark = 0;
1267
1268 /* FIXME: dynamic! */
1269 for (i = 0; i < SRE_MARK_SIZE; i++)
1270 state->mark[i] = NULL;
1271
1272 state->lastindex = -1;
1273
1274 state->repeat = NULL;
1275
1276 mark_fini(state);
1277}
1278
Guido van Rossumb700df92000-03-31 14:59:30 +00001279LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001280state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1281 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001282{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001283 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001284
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001285 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001286 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001287 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001288
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001289 memset(state, 0, sizeof(SRE_STATE));
1290
1291 state->lastindex = -1;
1292
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001293 /* get pointer to string buffer */
1294 buffer = string->ob_type->tp_as_buffer;
1295 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1296 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001297 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001298 return NULL;
1299 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001300
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001301 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001302 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1303 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001304 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1305 return NULL;
1306 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001307
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001308 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001309
1310#if PY_VERSION_HEX >= 0x01060000
1311 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001312#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001313 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001314#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001315
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001316 if (PyString_Check(string) || bytes == size)
1317 state->charsize = 1;
1318#if defined(HAVE_UNICODE)
1319 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1320 state->charsize = sizeof(Py_UNICODE);
1321#endif
1322 else {
1323 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1324 return NULL;
1325 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001326
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001327 /* adjust boundaries */
1328 if (start < 0)
1329 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001330 else if (start > size)
1331 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001332
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001333 if (end < 0)
1334 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001335 else if (end > size)
1336 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001337
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001338 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001339
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001340 state->start = (void*) ((char*) ptr + start * state->charsize);
1341 state->end = (void*) ((char*) ptr + end * state->charsize);
1342
1343 Py_INCREF(string);
1344 state->string = string;
1345 state->pos = start;
1346 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001347
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001348 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001349 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001350#if defined(HAVE_UNICODE)
1351 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001352 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001353#endif
1354 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001355 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001358}
1359
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001360LOCAL(void)
1361state_fini(SRE_STATE* state)
1362{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001363 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001364 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001365}
1366
1367LOCAL(PyObject*)
1368state_getslice(SRE_STATE* state, int index, PyObject* string)
1369{
Fredrik Lundh58100642000-08-09 09:14:35 +00001370 int i, j;
1371
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001372 index = (index - 1) * 2;
1373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001374 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001375 i = j = 0;
1376 } else {
1377 i = ((char*)state->mark[index] - (char*)state->beginning) /
1378 state->charsize;
1379 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1380 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001381 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001382
Fredrik Lundh58100642000-08-09 09:14:35 +00001383 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001384}
1385
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001386static void
1387pattern_error(int status)
1388{
1389 switch (status) {
1390 case SRE_ERROR_RECURSION_LIMIT:
1391 PyErr_SetString(
1392 PyExc_RuntimeError,
1393 "maximum recursion limit exceeded"
1394 );
1395 break;
1396 case SRE_ERROR_MEMORY:
1397 PyErr_NoMemory();
1398 break;
1399 default:
1400 /* other error codes indicate compiler/engine bugs */
1401 PyErr_SetString(
1402 PyExc_RuntimeError,
1403 "internal error in regular expression engine"
1404 );
1405 }
1406}
1407
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001408static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001409pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001410{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001411 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001413 MatchObject* match;
1414 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001415 char* base;
1416 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001417
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001418 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001420 /* create match object (with room for extra group marks) */
1421 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001422 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001423 if (!match)
1424 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001425
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001426 Py_INCREF(pattern);
1427 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001429 Py_INCREF(state->string);
1430 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001432 match->regs = NULL;
1433 match->groups = pattern->groups+1;
1434
1435 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001436
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001437 base = (char*) state->beginning;
1438 n = state->charsize;
1439
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001440 match->mark[0] = ((char*) state->start - base) / n;
1441 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001442
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001443 for (i = j = 0; i < pattern->groups; i++, j+=2)
1444 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1445 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1446 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1447 } else
1448 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1449
1450 match->pos = state->pos;
1451 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001452
Fredrik Lundh6f013982000-07-03 18:44:21 +00001453 match->lastindex = state->lastindex;
1454
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001455 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001456
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001457 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001458
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001459 /* no match */
1460 Py_INCREF(Py_None);
1461 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001462
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001463 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001464
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001465 /* internal error */
1466 pattern_error(status);
1467 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001468}
1469
1470static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001471pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001472{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001473 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001474
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001475 ScannerObject* self;
1476
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001477 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001478 int start = 0;
1479 int end = INT_MAX;
1480 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1481 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001482
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001483 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001484 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001485 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001486 return NULL;
1487
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001488 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001489 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001490 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001491 return NULL;
1492 }
1493
1494 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001495 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001496
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001497 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001498}
1499
Guido van Rossumb700df92000-03-31 14:59:30 +00001500static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001501pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001502{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001503 Py_XDECREF(self->pattern);
1504 Py_XDECREF(self->groupindex);
1505 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001506}
1507
1508static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001509pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001510{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001511 SRE_STATE state;
1512 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001513
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001514 PyObject* string;
1515 int start = 0;
1516 int end = INT_MAX;
1517 if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
1518 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001519
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001520 string = state_init(&state, self, string, start, end);
1521 if (!string)
1522 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001523
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001524 state.ptr = state.start;
1525
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001526 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1527
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001528 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001529 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001530 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001531#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001532 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001533#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001534 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001535
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001536 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1537
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001538 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001539
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001540 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001541}
1542
1543static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001544pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001545{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001546 SRE_STATE state;
1547 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001548
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001549 PyObject* string;
1550 int start = 0;
1551 int end = INT_MAX;
1552 if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
1553 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001554
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001555 string = state_init(&state, self, string, start, end);
1556 if (!string)
1557 return NULL;
1558
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001559 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1560
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001561 if (state.charsize == 1) {
1562 status = sre_search(&state, PatternObject_GetCode(self));
1563 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001564#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001565 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001566#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001567 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001568
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001569 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1570
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001571 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001572
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001573 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001574}
1575
1576static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001577call(char* function, PyObject* args)
1578{
1579 PyObject* name;
1580 PyObject* module;
1581 PyObject* func;
1582 PyObject* result;
1583
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001584 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001585 if (!name)
1586 return NULL;
1587 module = PyImport_Import(name);
1588 Py_DECREF(name);
1589 if (!module)
1590 return NULL;
1591 func = PyObject_GetAttrString(module, function);
1592 Py_DECREF(module);
1593 if (!func)
1594 return NULL;
1595 result = PyObject_CallObject(func, args);
1596 Py_DECREF(func);
1597 Py_DECREF(args);
1598 return result;
1599}
1600
1601static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001602pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001603{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001604 PyObject* template;
1605 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001606 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001607 if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
1608 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001609
1610 /* delegate to Python code */
1611 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1612}
1613
1614static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001615pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001616{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001617 PyObject* template;
1618 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001619 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001620 if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
1621 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001622
1623 /* delegate to Python code */
1624 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1625}
1626
1627static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001628pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001629{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001630 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001631 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001632 if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
1633 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001634
1635 /* delegate to Python code */
1636 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1637}
1638
1639static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001640pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001641{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001642 SRE_STATE state;
1643 PyObject* list;
1644 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001645 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001646
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001647 PyObject* string;
1648 int start = 0;
1649 int end = INT_MAX;
1650 if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
1651 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001652
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001653 string = state_init(&state, self, string, start, end);
1654 if (!string)
1655 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001656
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001657 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001658
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001659 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001660
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001661 PyObject* item;
1662
1663 state.ptr = state.start;
1664
1665 if (state.charsize == 1) {
1666 status = sre_search(&state, PatternObject_GetCode(self));
1667 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001668#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001669 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001670#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001671 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001672
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001673 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001674
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001675 /* don't bother to build a match object */
1676 switch (self->groups) {
1677 case 0:
1678 item = PySequence_GetSlice(
1679 string,
1680 ((char*) state.start - (char*) state.beginning) /
1681 state.charsize,
1682 ((char*) state.ptr - (char*) state.beginning) /
1683 state.charsize);
1684 if (!item)
1685 goto error;
1686 break;
1687 case 1:
1688 item = state_getslice(&state, 1, string);
1689 if (!item)
1690 goto error;
1691 break;
1692 default:
1693 item = PyTuple_New(self->groups);
1694 if (!item)
1695 goto error;
1696 for (i = 0; i < self->groups; i++) {
1697 PyObject* o = state_getslice(&state, i+1, string);
1698 if (!o) {
1699 Py_DECREF(item);
1700 goto error;
1701 }
1702 PyTuple_SET_ITEM(item, i, o);
1703 }
1704 break;
1705 }
1706
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001707 status = PyList_Append(list, item);
1708 Py_DECREF(item);
1709
1710 if (status < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001711 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001712
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001713 if (state.ptr == state.start)
1714 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001715 else
1716 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001717
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001718 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001719
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001720 if (status == 0)
1721 break;
1722
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001723 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001724 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001725
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001726 }
1727 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001728
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001729 state_fini(&state);
1730 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001731
1732error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001733 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001734 state_fini(&state);
1735 return NULL;
1736
Guido van Rossumb700df92000-03-31 14:59:30 +00001737}
1738
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001739static PyMethodDef pattern_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001740 {"match", (PyCFunction) pattern_match, 1},
1741 {"search", (PyCFunction) pattern_search, 1},
1742 {"sub", (PyCFunction) pattern_sub, 1},
1743 {"subn", (PyCFunction) pattern_subn, 1},
1744 {"split", (PyCFunction) pattern_split, 1},
1745 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001746 /* experimental */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001747 {"scanner", (PyCFunction) pattern_scanner, 1},
1748 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001749};
1750
1751static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001752pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001753{
1754 PyObject* res;
1755
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001756 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001757
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001758 if (res)
1759 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001760
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001761 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001762
1763 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001764 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001765 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001766 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001767 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001768
1769 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001770 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001771
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001772 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001773 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001774
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001775 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001776 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001777 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001778 }
1779
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001780 PyErr_SetString(PyExc_AttributeError, name);
1781 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001782}
1783
1784statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001785 PyObject_HEAD_INIT(NULL)
1786 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001787 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001788 (destructor)pattern_dealloc, /*tp_dealloc*/
1789 0, /*tp_print*/
1790 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001791};
1792
1793/* -------------------------------------------------------------------- */
1794/* match methods */
1795
1796static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001797match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001798{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001799 Py_XDECREF(self->regs);
1800 Py_XDECREF(self->string);
1801 Py_DECREF(self->pattern);
1802 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001803}
1804
1805static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001806match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001807{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001808 if (index < 0 || index >= self->groups) {
1809 /* raise IndexError if we were given a bad group number */
1810 PyErr_SetString(
1811 PyExc_IndexError,
1812 "no such group"
1813 );
1814 return NULL;
1815 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001816
Fredrik Lundh6f013982000-07-03 18:44:21 +00001817 index *= 2;
1818
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001819 if (self->string == Py_None || self->mark[index] < 0) {
1820 /* return default value if the string or group is undefined */
1821 Py_INCREF(def);
1822 return def;
1823 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001824
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001825 return PySequence_GetSlice(
1826 self->string, self->mark[index], self->mark[index+1]
1827 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001828}
1829
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001830static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001831match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001832{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001833 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001834
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001835 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001836 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001837
Fredrik Lundh6f013982000-07-03 18:44:21 +00001838 i = -1;
1839
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001840 if (self->pattern->groupindex) {
1841 index = PyObject_GetItem(self->pattern->groupindex, index);
1842 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001843 if (PyInt_Check(index))
1844 i = (int) PyInt_AS_LONG(index);
1845 Py_DECREF(index);
1846 } else
1847 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001848 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001849
1850 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001851}
1852
1853static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001854match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001855{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001856 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001857}
1858
1859static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001860match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001861{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001862 PyObject* result;
1863 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001864
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001865 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001866
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001867 switch (size) {
1868 case 0:
1869 result = match_getslice(self, Py_False, Py_None);
1870 break;
1871 case 1:
1872 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1873 break;
1874 default:
1875 /* fetch multiple items */
1876 result = PyTuple_New(size);
1877 if (!result)
1878 return NULL;
1879 for (i = 0; i < size; i++) {
1880 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001881 self, PyTuple_GET_ITEM(args, i), Py_None
1882 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001883 if (!item) {
1884 Py_DECREF(result);
1885 return NULL;
1886 }
1887 PyTuple_SET_ITEM(result, i, item);
1888 }
1889 break;
1890 }
1891 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001892}
1893
1894static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001895match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001896{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001897 PyObject* result;
1898 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001899
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001900 PyObject* def = Py_None;
1901 if (!PyArg_ParseTuple(args, "|O:groups", &def))
1902 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001903
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001904 result = PyTuple_New(self->groups-1);
1905 if (!result)
1906 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001907
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001908 for (index = 1; index < self->groups; index++) {
1909 PyObject* item;
1910 item = match_getslice_by_index(self, index, def);
1911 if (!item) {
1912 Py_DECREF(result);
1913 return NULL;
1914 }
1915 PyTuple_SET_ITEM(result, index-1, item);
1916 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001917
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001918 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001919}
1920
1921static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001922match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001923{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001924 PyObject* result;
1925 PyObject* keys;
1926 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001927
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001928 PyObject* def = Py_None;
1929 if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
1930 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001931
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001932 result = PyDict_New();
1933 if (!result || !self->pattern->groupindex)
1934 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001935
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001936 keys = PyMapping_Keys(self->pattern->groupindex);
1937 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001938 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001939 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001940 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001941
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001942 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1943 PyObject* key;
1944 PyObject* item;
1945 key = PyList_GET_ITEM(keys, index);
1946 if (!key) {
1947 Py_DECREF(keys);
1948 Py_DECREF(result);
1949 return NULL;
1950 }
1951 item = match_getslice(self, key, def);
1952 if (!item) {
1953 Py_DECREF(key);
1954 Py_DECREF(keys);
1955 Py_DECREF(result);
1956 return NULL;
1957 }
1958 /* FIXME: <fl> this can fail, right? */
1959 PyDict_SetItem(result, key, item);
1960 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001961
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001962 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001963
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001964 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001965}
1966
1967static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001968match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001969{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001970 int index;
1971
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001972 PyObject* index_ = Py_False; /* zero */
1973 if (!PyArg_ParseTuple(args, "|O:start", &index_))
1974 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001975
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001976 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001977
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001978 if (index < 0 || index >= self->groups) {
1979 PyErr_SetString(
1980 PyExc_IndexError,
1981 "no such group"
1982 );
1983 return NULL;
1984 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001985
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001986 if (self->mark[index*2] < 0) {
1987 Py_INCREF(Py_None);
1988 return Py_None;
1989 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001990
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001991 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00001992}
1993
1994static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001995match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001996{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001997 int index;
1998
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001999 PyObject* index_ = Py_False; /* zero */
2000 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2001 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002002
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002003 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002004
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002005 if (index < 0 || index >= self->groups) {
2006 PyErr_SetString(
2007 PyExc_IndexError,
2008 "no such group"
2009 );
2010 return NULL;
2011 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002012
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002013 if (self->mark[index*2] < 0) {
2014 Py_INCREF(Py_None);
2015 return Py_None;
2016 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002017
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002018 return Py_BuildValue("i", self->mark[index*2+1]);
2019}
2020
2021LOCAL(PyObject*)
2022_pair(int i1, int i2)
2023{
2024 PyObject* pair;
2025 PyObject* item;
2026
2027 pair = PyTuple_New(2);
2028 if (!pair)
2029 return NULL;
2030
2031 item = PyInt_FromLong(i1);
2032 if (!item)
2033 goto error;
2034 PyTuple_SET_ITEM(pair, 0, item);
2035
2036 item = PyInt_FromLong(i2);
2037 if (!item)
2038 goto error;
2039 PyTuple_SET_ITEM(pair, 1, item);
2040
2041 return pair;
2042
2043 error:
2044 Py_DECREF(pair);
2045 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002046}
2047
2048static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002049match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002050{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002051 int index;
2052
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002053 PyObject* index_ = Py_False; /* zero */
2054 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2055 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002056
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002057 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002058
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002059 if (index < 0 || index >= self->groups) {
2060 PyErr_SetString(
2061 PyExc_IndexError,
2062 "no such group"
2063 );
2064 return NULL;
2065 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002066
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002067 if (self->mark[index*2] < 0) {
2068 Py_INCREF(Py_None);
2069 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002070 return Py_BuildValue("OO", Py_None, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002071 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002072
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002073 return _pair(self->mark[index*2], self->mark[index*2+1]);
2074}
2075
2076static PyObject*
2077match_regs(MatchObject* self)
2078{
2079 PyObject* regs;
2080 PyObject* item;
2081 int index;
2082
2083 regs = PyTuple_New(self->groups);
2084 if (!regs)
2085 return NULL;
2086
2087 for (index = 0; index < self->groups; index++) {
2088 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2089 if (!item) {
2090 Py_DECREF(regs);
2091 return NULL;
2092 }
2093 PyTuple_SET_ITEM(regs, index, item);
2094 }
2095
2096 Py_INCREF(regs);
2097 self->regs = regs;
2098
2099 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002100}
2101
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002102static PyMethodDef match_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002103 {"group", (PyCFunction) match_group, 1},
2104 {"start", (PyCFunction) match_start, 1},
2105 {"end", (PyCFunction) match_end, 1},
2106 {"span", (PyCFunction) match_span, 1},
2107 {"groups", (PyCFunction) match_groups, 1},
2108 {"groupdict", (PyCFunction) match_groupdict, 1},
2109 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002110};
2111
2112static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002113match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002114{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002115 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002116
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002117 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2118 if (res)
2119 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002120
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002122
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002123 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002124 if (self->lastindex >= 0)
2125 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002126 Py_INCREF(Py_None);
2127 return Py_None;
2128 }
2129
2130 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002131 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002132 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002133 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002134 );
2135 if (result)
2136 return result;
2137 PyErr_Clear();
2138 }
2139 Py_INCREF(Py_None);
2140 return Py_None;
2141 }
2142
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002143 if (!strcmp(name, "string")) {
2144 if (self->string) {
2145 Py_INCREF(self->string);
2146 return self->string;
2147 } else {
2148 Py_INCREF(Py_None);
2149 return Py_None;
2150 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002151 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002152
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 if (!strcmp(name, "regs")) {
2154 if (self->regs) {
2155 Py_INCREF(self->regs);
2156 return self->regs;
2157 } else
2158 return match_regs(self);
2159 }
2160
2161 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002162 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002164 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002165
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002166 if (!strcmp(name, "pos"))
2167 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002168
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002169 if (!strcmp(name, "endpos"))
2170 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002171
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002172 PyErr_SetString(PyExc_AttributeError, name);
2173 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002174}
2175
2176/* FIXME: implement setattr("string", None) as a special case (to
2177 detach the associated string, if any */
2178
2179statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 PyObject_HEAD_INIT(NULL)
2181 0, "SRE_Match",
2182 sizeof(MatchObject), sizeof(int),
2183 (destructor)match_dealloc, /*tp_dealloc*/
2184 0, /*tp_print*/
2185 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002186};
2187
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002188/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002189/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002190
2191static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002192scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002193{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002194 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002195 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002196 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002197}
2198
2199static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002200scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002201{
2202 SRE_STATE* state = &self->state;
2203 PyObject* match;
2204 int status;
2205
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002206 state_reset(state);
2207
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002208 state->ptr = state->start;
2209
2210 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002211 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002212 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002213#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002214 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002215#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002216 }
2217
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002218 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002219 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002220
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002221 if (status == 0 || state->ptr == state->start)
2222 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002223 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002224 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002225
2226 return match;
2227}
2228
2229
2230static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002231scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002232{
2233 SRE_STATE* state = &self->state;
2234 PyObject* match;
2235 int status;
2236
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002237 state_reset(state);
2238
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002239 state->ptr = state->start;
2240
2241 if (state->charsize == 1) {
2242 status = sre_search(state, PatternObject_GetCode(self->pattern));
2243 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002244#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002245 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002246#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002247 }
2248
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002249 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002250 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002251
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002252 if (status == 0 || state->ptr == state->start)
2253 state->start = (void*) ((char*) state->ptr + state->charsize);
2254 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002255 state->start = state->ptr;
2256
2257 return match;
2258}
2259
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002260static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002261 {"match", (PyCFunction) scanner_match, 0},
2262 {"search", (PyCFunction) scanner_search, 0},
2263 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002264};
2265
2266static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002267scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002268{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002269 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002270
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002271 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2272 if (res)
2273 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002274
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002275 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002276
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002277 /* attributes */
2278 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002279 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002280 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002281 }
2282
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002283 PyErr_SetString(PyExc_AttributeError, name);
2284 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002285}
2286
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002287statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002288 PyObject_HEAD_INIT(NULL)
2289 0, "SRE_Scanner",
2290 sizeof(ScannerObject), 0,
2291 (destructor)scanner_dealloc, /*tp_dealloc*/
2292 0, /*tp_print*/
2293 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002294};
2295
Guido van Rossumb700df92000-03-31 14:59:30 +00002296static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002297 {"compile", _compile, 1},
2298 {"getcodesize", sre_codesize, 1},
2299 {"getlower", sre_getlower, 1},
2300 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002301};
2302
2303void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002304#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002305__declspec(dllexport)
2306#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002307init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002308{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002309 /* Patch object types */
2310 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002311 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002312
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002313 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002314}
2315
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002316#endif /* !defined(SRE_RECURSIVE) */