blob: b0efc85d23ae61c84a7887f73eaf2ef57fb2dadb [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00007 * 99-10-24 fl created (based on existing template matcher code)
8 * 99-11-13 fl added categories, branching, and more (0.2)
9 * 99-11-16 fl some tweaks to compile on non-Windows platforms
10 * 99-12-18 fl non-literals, generic maximizing repeat (0.3)
11 * 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
12 * 00-03-06 fl first alpha, sort of (0.5)
13 * 00-03-14 fl removed most compatibility stuff (0.6)
14 * 00-05-10 fl towards third alpha (0.8.2)
15 * 00-05-13 fl added experimental scanner stuff (0.8.3)
16 * 00-05-27 fl final bug hunt (0.8.4)
17 * 00-06-21 fl less bugs, more taste (0.8.5)
18 * 00-06-25 fl major changes to better deal with nested repeats (0.9)
19 * 00-06-28 fl fixed findall (0.9.1)
20 * 00-06-29 fl fixed split, added more scanner features (0.9.2)
21 * 00-06-30 fl added fast search optimization (0.9.3)
22 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
23 * 00-07-02 fl added charset optimizations, etc (0.9.5)
24 * 00-07-03 fl store code in pattern object, lookbehind, etc
25 * 00-07-08 fl added regs attribute
26 * 00-07-18 fl changed branch operator to use failure stack
27 * 00-07-21 fl reset lastindex in scanner methods (0.9.6)
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
29 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
30 *
Guido van Rossumb700df92000-03-31 14:59:30 +000031 * Portions of this engine have been developed in cooperation with
Fredrik Lundh22d25462000-07-01 17:50:59 +000032 * CNRI. Hewlett-Packard provided funding for 2.0 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * other compatibility work.
34 */
35
36#ifndef SRE_RECURSIVE
37
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000038char copyright[] = " SRE 0.9.6 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000039
40#include "Python.h"
41
42#include "sre.h"
43
Guido van Rossumb700df92000-03-31 14:59:30 +000044#if defined(HAVE_LIMITS_H)
45#include <limits.h>
46#else
47#define INT_MAX 2147483647
48#endif
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
53#define MODULE "sre"
54
Guido van Rossumb700df92000-03-31 14:59:30 +000055/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000056#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000057
Fredrik Lundh436c3d582000-06-29 08:58:44 +000058#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000059/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000060#define HAVE_UNICODE
61#endif
62
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000064/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000065
66/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000067#define USE_FAST_SEARCH
68
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069/* enables aggressive inlining (always on for Visual C) */
70#define USE_INLINE
71
72/* -------------------------------------------------------------------- */
73
Fredrik Lundh80946112000-06-29 18:03:25 +000074#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000075#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000076#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000077/* fastest possible local call under MSVC */
78#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000080#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081#else
82#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000083#endif
84
85/* error codes */
86#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
87#define SRE_ERROR_MEMORY -9 /* out of memory */
88
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000089#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000090#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000091#else
92#define TRACE(v)
93#endif
94
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100/* default character predicates (run sre_chars.py to regenerate tables) */
101
102#define SRE_DIGIT_MASK 1
103#define SRE_SPACE_MASK 2
104#define SRE_LINEBREAK_MASK 4
105#define SRE_ALNUM_MASK 8
106#define SRE_WORD_MASK 16
107
108static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1092, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11125, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1130, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
115
Fredrik Lundhb389df32000-06-29 12:48:37 +0000116static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011710, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
11827, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
11944, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12061, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
121108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
122122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
123106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
124120, 121, 122, 123, 124, 125, 126, 127 };
125
Fredrik Lundhb389df32000-06-29 12:48:37 +0000126static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000127{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000128 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000129}
130
131#define SRE_IS_DIGIT(ch)\
132 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
133#define SRE_IS_SPACE(ch)\
134 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
135#define SRE_IS_LINEBREAK(ch)\
136 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
137#define SRE_IS_ALNUM(ch)\
138 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
139#define SRE_IS_WORD(ch)\
140 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000141
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000142/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000143
Fredrik Lundhb389df32000-06-29 12:48:37 +0000144static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000145{
146 return ((ch) < 256 ? tolower((ch)) : ch);
147}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000148#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
149#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
150#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
151#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
152#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
153
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000154/* unicode-specific character predicates */
155
156#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000157static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000158{
159 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
160}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000161#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
162#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
163#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000164#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000165#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000166#endif
167
Guido van Rossumb700df92000-03-31 14:59:30 +0000168LOCAL(int)
169sre_category(SRE_CODE category, unsigned int ch)
170{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000172
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000173 case SRE_CATEGORY_DIGIT:
174 return SRE_IS_DIGIT(ch);
175 case SRE_CATEGORY_NOT_DIGIT:
176 return !SRE_IS_DIGIT(ch);
177 case SRE_CATEGORY_SPACE:
178 return SRE_IS_SPACE(ch);
179 case SRE_CATEGORY_NOT_SPACE:
180 return !SRE_IS_SPACE(ch);
181 case SRE_CATEGORY_WORD:
182 return SRE_IS_WORD(ch);
183 case SRE_CATEGORY_NOT_WORD:
184 return !SRE_IS_WORD(ch);
185 case SRE_CATEGORY_LINEBREAK:
186 return SRE_IS_LINEBREAK(ch);
187 case SRE_CATEGORY_NOT_LINEBREAK:
188 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000189
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000190 case SRE_CATEGORY_LOC_WORD:
191 return SRE_LOC_IS_WORD(ch);
192 case SRE_CATEGORY_LOC_NOT_WORD:
193 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000194
195#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000196 case SRE_CATEGORY_UNI_DIGIT:
197 return SRE_UNI_IS_DIGIT(ch);
198 case SRE_CATEGORY_UNI_NOT_DIGIT:
199 return !SRE_UNI_IS_DIGIT(ch);
200 case SRE_CATEGORY_UNI_SPACE:
201 return SRE_UNI_IS_SPACE(ch);
202 case SRE_CATEGORY_UNI_NOT_SPACE:
203 return !SRE_UNI_IS_SPACE(ch);
204 case SRE_CATEGORY_UNI_WORD:
205 return SRE_UNI_IS_WORD(ch);
206 case SRE_CATEGORY_UNI_NOT_WORD:
207 return !SRE_UNI_IS_WORD(ch);
208 case SRE_CATEGORY_UNI_LINEBREAK:
209 return SRE_UNI_IS_LINEBREAK(ch);
210 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
211 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000212#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000213 }
214 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000215}
216
217/* helpers */
218
219LOCAL(int)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000220stack_free(SRE_STATE* state)
Guido van Rossumb700df92000-03-31 14:59:30 +0000221{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000222 if (state->stack) {
223 TRACE(("release stack\n"));
224 free(state->stack);
225 state->stack = NULL;
226 }
227 state->stacksize = 0;
228 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000229}
230
231static int /* shouldn't be LOCAL */
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000232stack_extend(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000233{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000234 SRE_STACK* stack;
235 int stacksize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000236
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000237 /* grow the stack to a suitable size; we need at least lo entries,
238 at most hi entries. if for some reason hi is lower than lo, lo
239 wins */
Guido van Rossumb700df92000-03-31 14:59:30 +0000240
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000241 stacksize = state->stacksize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000242
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000243 if (stacksize == 0) {
244 /* create new stack */
245 stacksize = 512;
246 if (stacksize < lo)
247 stacksize = lo;
248 else if (stacksize > hi)
249 stacksize = hi;
250 TRACE(("allocate stack %d\n", stacksize));
251 stack = malloc(sizeof(SRE_STACK) * stacksize);
252 } else {
253 /* grow the stack (typically by a factor of two) */
254 while (stacksize < lo)
255 stacksize = 2 * stacksize;
256 /* FIXME: <fl> could trim size if it's much larger than hi,
Fredrik Lundh28552902000-07-05 21:14:16 +0000257 as long it's larger than lo */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000258 TRACE(("grow stack to %d\n", stacksize));
259 stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
260 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000261
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000262 if (!stack) {
263 stack_free(state);
264 return SRE_ERROR_MEMORY;
265 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000266
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000267 state->stack = stack;
268 state->stacksize = stacksize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000269
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000270 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000271}
272
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000273/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000274
275#define SRE_CHAR unsigned char
276#define SRE_AT sre_at
277#define SRE_MEMBER sre_member
278#define SRE_MATCH sre_match
279#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000280
281#if defined(HAVE_UNICODE)
282
Guido van Rossumb700df92000-03-31 14:59:30 +0000283#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000284#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000285#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000286
Guido van Rossumb700df92000-03-31 14:59:30 +0000287#undef SRE_SEARCH
288#undef SRE_MATCH
289#undef SRE_MEMBER
290#undef SRE_AT
291#undef SRE_CHAR
292
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000293/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000294
295#define SRE_CHAR Py_UNICODE
296#define SRE_AT sre_uat
297#define SRE_MEMBER sre_umember
298#define SRE_MATCH sre_umatch
299#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000300#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000301
302#endif /* SRE_RECURSIVE */
303
304/* -------------------------------------------------------------------- */
305/* String matching engine */
306
307/* the following section is compiled twice, with different character
308 settings */
309
310LOCAL(int)
311SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
312{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000313 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000314
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000315 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000316
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000317 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000318
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000319 case SRE_AT_BEGINNING:
320 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000321
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000322 case SRE_AT_BEGINNING_LINE:
323 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000324 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000325
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000326 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000327 return (((void*) (ptr+1) == state->end &&
328 SRE_IS_LINEBREAK((int) ptr[0])) ||
329 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000330
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000331 case SRE_AT_END_LINE:
332 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000333 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000334
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000335 case SRE_AT_BOUNDARY:
336 if (state->beginning == state->end)
337 return 0;
338 that = ((void*) ptr > state->beginning) ?
339 SRE_IS_WORD((int) ptr[-1]) : 0;
340 this = ((void*) ptr < state->end) ?
341 SRE_IS_WORD((int) ptr[0]) : 0;
342 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000343
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000344 case SRE_AT_NON_BOUNDARY:
345 if (state->beginning == state->end)
346 return 0;
347 that = ((void*) ptr > state->beginning) ?
348 SRE_IS_WORD((int) ptr[-1]) : 0;
349 this = ((void*) ptr < state->end) ?
350 SRE_IS_WORD((int) ptr[0]) : 0;
351 return this == that;
352 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000353
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000354 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000355}
356
357LOCAL(int)
Fredrik Lundh0640e112000-06-30 13:55:15 +0000358SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000359{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000360 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000361
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000362 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000363
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000364 for (;;) {
365 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000366
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000367 case SRE_OP_NEGATE:
368 ok = !ok;
369 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000370
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000371 case SRE_OP_FAILURE:
372 return !ok;
Guido van Rossumb700df92000-03-31 14:59:30 +0000373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000374 case SRE_OP_LITERAL:
Fredrik Lundhc13222c2000-07-01 23:49:14 +0000375 /* args: <literal> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000376 if (ch == set[0])
377 return ok;
378 set++;
379 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000380
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000381 case SRE_OP_RANGE:
Fredrik Lundhc13222c2000-07-01 23:49:14 +0000382 /* args: <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000383 if (set[0] <= ch && ch <= set[1])
384 return ok;
385 set += 2;
386 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000387
Fredrik Lundh3562f112000-07-02 12:00:07 +0000388 case SRE_OP_CHARSET:
389 /* args: <bitmap> (16 bits per code word) */
390 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
391 return ok;
392 set += 16;
393 break;
394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 case SRE_OP_CATEGORY:
Fredrik Lundhc13222c2000-07-01 23:49:14 +0000396 /* args: <category> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 if (sre_category(set[0], (int) ch))
398 return ok;
399 set += 1;
400 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 default:
403 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000404 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000405 return 0;
406 }
407 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000408}
409
410LOCAL(int)
411SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
412{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 /* check if string matches the given pattern. returns -1 for
414 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000415
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000416 SRE_CHAR* end = state->end;
417 SRE_CHAR* ptr = state->ptr;
418 int stack;
419 int stackbase;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000420 int lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000421 int i, count;
Fredrik Lundh72b82ba2000-07-03 21:31:48 +0000422 SRE_STACK* sp;
Guido van Rossumb700df92000-03-31 14:59:30 +0000423
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000424 /* FIXME: this is a hack! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000425 void* mark_copy[SRE_MARK_SIZE];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000426 void* mark = NULL;
427
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000428#define PUSH(skip_, mark_, max_)\
429 if (stack >= state->stacksize) {\
430 i = stack_extend(state, stack + 1, stackbase + max_);\
431 if (i < 0)\
432 return i;\
433 }\
434 TRACE(("%8d: stack[%d]\n", PTR(ptr), stack));\
435 sp = state->stack + (stack++);\
436 sp->ptr = ptr;\
437 sp->pattern = pattern + skip_;\
438 sp->mark = mark_;\
439 if (mark_ != 65535) {\
440 sp->mark0 = state->mark[mark_];\
441 sp->mark1 = state->mark[mark_+1];\
442 TRACE((" mark %d %d %d\n", mark_, PTR(state->mark[mark_]),\
443 PTR(state->mark[mark_+1])));\
444 }\
445
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000446 TRACE(("%8d: enter\n", PTR(ptr)));
447
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000448 if (pattern[0] == SRE_OP_INFO) {
449 /* optimization info block */
450 /* args: <1=skip> <2=flags> <3=min> ... */
451 if (pattern[3] && (end - ptr) < pattern[3]) {
452 TRACE(("reject (got %d chars, need %d)\n",
453 (end - ptr), pattern[3]));
454 return 0;
455 }
456 pattern += pattern[1] + 1;
457 }
458
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000459 stackbase = stack = state->stackbase;
460 lastmark = state->lastmark;
461
462 retry:
Guido van Rossumb700df92000-03-31 14:59:30 +0000463
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000464 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000465
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000466 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000467
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000468 case SRE_OP_FAILURE:
469 /* immediate failure */
470 TRACE(("%8d: failure\n", PTR(ptr)));
471 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000473 case SRE_OP_SUCCESS:
474 /* end of pattern */
475 TRACE(("%8d: success\n", PTR(ptr)));
476 state->ptr = ptr;
477 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000478
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000479 case SRE_OP_AT:
480 /* match at given position */
481 /* args: <at> */
482 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
483 if (!SRE_AT(state, ptr, *pattern))
484 goto failure;
485 pattern++;
486 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000487
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000488 case SRE_OP_CATEGORY:
489 /* match at given category */
490 /* args: <category> */
491 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000492 *ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000493 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
494 goto failure;
495 TRACE(("%8d: category ok\n", PTR(ptr)));
496 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000497 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000498 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000499
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 case SRE_OP_LITERAL:
501 /* match literal string */
502 /* args: <code> */
503 TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
504 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
505 goto failure;
506 pattern++;
507 ptr++;
508 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000509
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000510 case SRE_OP_NOT_LITERAL:
511 /* match anything that is not literal character */
512 /* args: <code> */
513 TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
514 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
515 goto failure;
516 pattern++;
517 ptr++;
518 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000519
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000520 case SRE_OP_ANY:
521 /* match anything */
522 TRACE(("%8d: anything\n", PTR(ptr)));
523 if (ptr >= end)
524 goto failure;
525 ptr++;
526 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000527
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000528 case SRE_OP_IN:
529 /* match set member (or non_member) */
530 /* args: <skip> <set> */
531 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
532 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
533 goto failure;
534 pattern += pattern[0];
535 ptr++;
536 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000537
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000538 case SRE_OP_GROUP:
539 /* match backreference */
540 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
541 i = pattern[0];
542 {
543 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
544 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
545 if (!p || !e || e < p)
546 goto failure;
547 while (p < e) {
548 if (ptr >= end || *ptr != *p)
549 goto failure;
550 p++; ptr++;
551 }
552 }
553 pattern++;
554 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000555
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000556 case SRE_OP_GROUP_IGNORE:
557 /* match backreference */
558 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
559 i = pattern[0];
560 {
561 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
562 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
563 if (!p || !e || e < p)
564 goto failure;
565 while (p < e) {
566 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000567 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000568 goto failure;
569 p++; ptr++;
570 }
571 }
572 pattern++;
573 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000574
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 case SRE_OP_LITERAL_IGNORE:
576 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
577 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000578 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000579 goto failure;
580 pattern++;
581 ptr++;
582 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000583
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000584 case SRE_OP_NOT_LITERAL_IGNORE:
585 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
586 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000587 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000588 goto failure;
589 pattern++;
590 ptr++;
591 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000592
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000593 case SRE_OP_IN_IGNORE:
594 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
595 if (ptr >= end
596 || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
597 goto failure;
598 pattern += pattern[0];
599 ptr++;
600 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000602 case SRE_OP_MARK:
603 /* set mark */
604 /* args: <mark> */
605 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
Fredrik Lundh72b82ba2000-07-03 21:31:48 +0000606 if (state->lastmark < pattern[0]+1)
607 state->lastmark = pattern[0]+1;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000608 if (!mark) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000609 mark = mark_copy;
610 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000611 }
612 state->mark[pattern[0]] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 pattern++;
614 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000615
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000616 case SRE_OP_INDEX:
617 /* set index */
618 /* args: <index> */
619 TRACE(("%8d: set index %d\n", PTR(ptr), pattern[0]));
Fredrik Lundh6f013982000-07-03 18:44:21 +0000620 state->lastindex = pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000621 pattern++;
622 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000623
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000624 case SRE_OP_JUMP:
625 case SRE_OP_INFO:
626 /* jump forward */
627 /* args: <skip> */
628 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
629 pattern += pattern[0];
630 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000631
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000632 case SRE_OP_ASSERT:
633 /* assert subpattern */
634 /* args: <skip> <back> <pattern> */
635 TRACE(("%8d: assert subpattern %d\n", PTR(ptr), pattern[1]));
636 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000637 if (state->ptr < state->beginning)
638 goto failure;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000639 i = SRE_MATCH(state, pattern + 2);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000640 if (i < 0)
641 return i;
642 if (!i)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000643 goto failure;
644 pattern += pattern[0];
645 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000646
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000647 case SRE_OP_ASSERT_NOT:
648 /* assert not subpattern */
649 /* args: <skip> <pattern> */
650 TRACE(("%8d: assert not subpattern %d\n", PTR(ptr), pattern[1]));
651 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000652 if (state->ptr < state->beginning)
653 goto failure;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 i = SRE_MATCH(state, pattern + 2);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000655 if (i < 0)
656 return i;
657 if (i)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000658 goto failure;
659 pattern += pattern[0];
660 break;
661
662 case SRE_OP_BRANCH:
663 /* try an alternate branch */
664 /* format: <branch> <0=skip> <1=mark> <tail...> */
665 TRACE(("%8d: branch\n", PTR(ptr)));
666 if (pattern[2] != SRE_OP_LITERAL ||
667 (ptr < end && (SRE_CODE) ptr[0] == pattern[3])) {
668 /* worth trying */
669 PUSH(pattern[0], pattern[3], 1);
670 pattern += 2;
671 } else
672 pattern += pattern[0];
673 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000674
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000675#if 0
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 case SRE_OP_MAX_REPEAT_ONE:
677 /* match repeated sequence (maximizing regexp) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000678
679 /* this operator only works if the repeated item is
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000680 exactly one character wide, and we're not already
681 collecting backtracking points. for other cases,
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000682 use the MAX_REPEAT operator instead */
683
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000684 /* args: <skip> <min> <max> <step> */
685 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
686 pattern[1], pattern[2]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000687
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000688 count = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000689
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000690 if (pattern[3] == SRE_OP_ANY) {
691 /* repeated wildcard. skip to the end of the target
692 string, and backtrack from there */
693 /* FIXME: must look for line endings */
694 if (ptr + pattern[1] > end)
695 goto failure; /* cannot match */
696 count = pattern[2];
697 if (count > end - ptr)
698 count = end - ptr;
699 ptr += count;
Guido van Rossumb700df92000-03-31 14:59:30 +0000700
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000701 } else if (pattern[3] == SRE_OP_LITERAL) {
702 /* repeated literal */
703 SRE_CODE chr = pattern[4];
704 while (count < (int) pattern[2]) {
705 if (ptr >= end || (SRE_CODE) ptr[0] != chr)
706 break;
707 ptr++;
708 count++;
709 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000710
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000711 } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
712 /* repeated literal */
713 SRE_CODE chr = pattern[4];
714 while (count < (int) pattern[2]) {
715 if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
716 break;
717 ptr++;
718 count++;
719 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000720
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000721 } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
722 /* repeated non-literal */
723 SRE_CODE chr = pattern[4];
724 while (count < (int) pattern[2]) {
725 if (ptr >= end || (SRE_CODE) ptr[0] == chr)
726 break;
727 ptr++;
728 count++;
729 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000730
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000731 } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
732 /* repeated non-literal */
733 SRE_CODE chr = pattern[4];
734 while (count < (int) pattern[2]) {
735 if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
736 break;
737 ptr++;
738 count++;
739 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000740
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 } else if (pattern[3] == SRE_OP_IN) {
742 /* repeated set */
743 while (count < (int) pattern[2]) {
744 if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
745 break;
746 ptr++;
747 count++;
748 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000749
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000750 } else {
751 /* repeated single character pattern */
752 state->ptr = ptr;
753 while (count < (int) pattern[2]) {
754 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000755 if (i < 0)
756 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 if (!i)
758 break;
759 count++;
760 }
761 state->ptr = ptr;
762 ptr += count;
763 }
764
765 /* when we arrive here, count contains the number of
766 matches, and ptr points to the tail of the target
767 string. check if the rest of the pattern matches, and
768 backtrack if not. */
769
770 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
771
772 if (count < (int) pattern[1])
773 goto failure;
774
775 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
776 /* tail is empty. we're finished */
777 TRACE(("%8d: tail is empty\n", PTR(ptr)));
778 state->ptr = ptr;
779 goto success;
780
781 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
782 /* tail starts with a literal. skip positions where
783 the rest of the pattern cannot possibly match */
784 SRE_CODE chr = pattern[pattern[0]+1];
785 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
786 for (;;) {
787 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
788 while (count >= (int) pattern[1] &&
789 (ptr >= end || *ptr != chr)) {
790 ptr--;
791 count--;
792 }
793 TRACE(("%8d: check tail\n", PTR(ptr)));
794 if (count < (int) pattern[1])
795 break;
796 state->ptr = ptr;
797 i = SRE_MATCH(state, pattern + pattern[0]);
798 if (i > 0) {
799 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
800 goto success;
801 }
802 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
803 ptr--;
804 count--;
805 }
806
807 } else {
808 /* general case */
809 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
810 while (count >= (int) pattern[1]) {
811 state->ptr = ptr;
812 i = SRE_MATCH(state, pattern + pattern[0]);
813 if (i < 0)
814 return i;
815 if (i) {
816 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
817 goto success;
818 }
819 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
820 ptr--;
821 count--;
822 }
823 }
824 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000825#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000826
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000827 case SRE_OP_MAX_REPEAT:
828 /* match repeated sequence (maximizing regexp) */
Fredrik Lundh72b82ba2000-07-03 21:31:48 +0000829 /* args: <skip> <1=min> <2=max> <3=save> <4=item> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000830
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000831 TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
832 pattern[1], pattern[2]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000833
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000834 count = 0;
835 state->ptr = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000836
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000837 /* match minimum number of items */
838 while (count < (int) pattern[1]) {
Fredrik Lundh72b82ba2000-07-03 21:31:48 +0000839 i = SRE_MATCH(state, pattern + 4);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000840 if (i < 0)
841 return i;
842 if (!i)
843 goto failure;
844 if (state->ptr == ptr) {
845 /* if the match was successful but empty, set the
846 count to max and terminate the scanning loop */
847 count = (int) pattern[2];
848 break;
849 }
850 count++;
851 ptr = state->ptr;
852 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000853
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000854 TRACE(("%8d: found %d leading items\n", PTR(ptr), count));
Guido van Rossumb700df92000-03-31 14:59:30 +0000855
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000856 if (count < (int) pattern[1])
857 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000858
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000859 /* match maximum number of items, pushing alternate end
860 points to the stack */
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000862 while (pattern[2] == 65535 || count < (int) pattern[2]) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000863 /* this position is valid; add it to the retry
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000864 stack */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000865 PUSH(pattern[0], pattern[3], pattern[2]);
866 /* match more stuff */
867 state->stackbase = stack;
868 i = SRE_MATCH(state, pattern + 4);
869 state->stackbase = stackbase;
Fredrik Lundh28552902000-07-05 21:14:16 +0000870 if (i < 0)
871 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000872 if (!i)
Fredrik Lundh28552902000-07-05 21:14:16 +0000873 break;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000874 if (state->ptr == ptr) {
875 count = (int) pattern[2];
876 break;
877 }
878 /* move forward */
879 ptr = state->ptr;
880 count++;
881 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000882
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000883 /* when we get here, count is the number of successful
884 matches, and ptr points to the tail. */
Guido van Rossumb700df92000-03-31 14:59:30 +0000885
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000886 TRACE(("%8d: skip +%d\n", PTR(ptr), pattern[0]));
887
888 pattern += pattern[0];
889 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000890
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000891 case SRE_OP_MIN_REPEAT:
892 /* match repeated sequence (minimizing regexp) */
Fredrik Lundh72b82ba2000-07-03 21:31:48 +0000893 /* args: <skip> <1=min> <2=max> <3=save> <4=item> */
894
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000895 TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
896 pattern[1], pattern[2]));
897 count = 0;
898 state->ptr = ptr;
899 /* match minimum number of items */
900 while (count < (int) pattern[1]) {
901 i = SRE_MATCH(state, pattern + 4);
902 if (i < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000903 return i;
904 if (!i)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000905 goto failure;
906 count++;
907 }
908 /* move forward until the tail matches. */
909 while (count <= (int) pattern[2]) {
910 ptr = state->ptr;
911 i = SRE_MATCH(state, pattern + pattern[0]);
912 if (i > 0) {
913 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
914 goto success;
915 }
916 state->ptr = ptr; /* backtrack */
917 i = SRE_MATCH(state, pattern + 4);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000918 if (i < 0)
919 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000920 if (!i)
921 goto failure;
922 count++;
923 }
924 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000925
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000926 case SRE_OP_REPEAT:
927 /* TEMPLATE: match repeated sequence (no backtracking) */
928 /* args: <skip> <min> <max> */
929 TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
930 count = 0;
931 state->ptr = ptr;
932 while (count < (int) pattern[2]) {
933 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000934 if (i < 0)
935 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000936 if (!i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000937 break;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000938 if (state->ptr == ptr) {
939 count = (int) pattern[2];
940 break;
941 }
942 count++;
943 }
944 if (count <= (int) pattern[1])
945 goto failure;
946 TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
947 pattern += pattern[0];
948 ptr = state->ptr;
949 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000950
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000951 default:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000952 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
953 return SRE_ERROR_ILLEGAL;
954 }
955 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000956
957 failure:
Fredrik Lundh72b82ba2000-07-03 21:31:48 +0000958 TRACE(("%8d: leave (failure)\n", PTR(ptr)));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000959 if (stack-- > stackbase) {
Fredrik Lundh28552902000-07-05 21:14:16 +0000960 TRACE(("%8d: pop stack[%d]\n", stack));
Fredrik Lundh72b82ba2000-07-03 21:31:48 +0000961 sp = state->stack + stack;
962 ptr = sp->ptr;
963 pattern = sp->pattern;
964 if (sp->mark != 65535) {
965 state->mark[sp->mark] = sp->mark0;
966 state->mark[sp->mark+1] = sp->mark1;
967 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000968 TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
969 goto retry;
970 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000971 state->lastmark = lastmark;
Fredrik Lundh72b82ba2000-07-03 21:31:48 +0000972 state->stackbase = stackbase;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000973 if (mark)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000974 memcpy(state->mark, mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000975 return 0;
976
977 success:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000978 TRACE(("%8d: leave (success)\n", PTR(ptr)));
979 state->stackbase = stackbase;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000980 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000981}
982
983LOCAL(int)
984SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
985{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000986 SRE_CHAR* ptr = state->start;
987 SRE_CHAR* end = state->end;
988 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +0000989 int prefix_len = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +0000990 SRE_CODE* prefix = NULL;
991 SRE_CODE* charset = NULL;
992 SRE_CODE* overlap = NULL;
993 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000994
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000995 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000996 /* optimization info block */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000997 /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
998
999 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001000
1001 if (pattern[3] > 0) {
1002 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001003 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001004 end -= pattern[3]-1;
1005 if (end <= ptr)
1006 end = ptr+1;
1007 }
1008
Fredrik Lundh3562f112000-07-02 12:00:07 +00001009 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001010 /* pattern starts with a known prefix */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001011 prefix_len = pattern[5];
1012 prefix = pattern + 6;
1013 overlap = prefix + prefix_len - 1;
1014 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001015 /* pattern starts with a character from a known set */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001016 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001017
1018 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001019 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001020
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001021#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001022 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001023 /* pattern starts with a known prefix. use the overlap
1024 table to skip forward as fast as we possibly can */
1025 int i = 0;
1026 end = state->end;
1027 while (ptr < end) {
1028 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001029 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001030 if (!i)
1031 break;
1032 else
1033 i = overlap[i];
1034 } else {
1035 if (++i == prefix_len) {
1036 /* found a potential match */
1037 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
1038 state->start = ptr - prefix_len + 1;
1039 state->ptr = ptr + 1;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001040 if (flags & SRE_INFO_LITERAL)
1041 return 1; /* we got all of it */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001042 status = SRE_MATCH(state, pattern + 2*prefix_len);
1043 if (status != 0)
1044 return status;
1045 /* close but no cigar -- try again */
1046 i = overlap[i];
1047 }
1048 break;
1049 }
1050
1051 }
1052 ptr++;
1053 }
1054 return 0;
1055 }
1056#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001057
Fredrik Lundh3562f112000-07-02 12:00:07 +00001058 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001059 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001060 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001061 SRE_CODE chr = pattern[1];
1062 for (;;) {
1063 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1064 ptr++;
1065 if (ptr == end)
1066 return 0;
1067 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
1068 state->start = ptr;
1069 state->ptr = ++ptr;
1070 status = SRE_MATCH(state, pattern + 2);
1071 if (status != 0)
1072 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001073 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001074 } else if (charset) {
1075 /* pattern starts with a character from a known set */
1076 for (;;) {
1077 while (ptr < end && !SRE_MEMBER(charset, ptr[0]))
1078 ptr++;
1079 if (ptr == end)
1080 return 0;
1081 TRACE(("%8d: === SEARCH === charset\n", PTR(ptr)));
1082 state->start = ptr;
1083 state->ptr = ptr;
1084 status = SRE_MATCH(state, pattern);
1085 if (status != 0)
1086 break;
1087 }
1088 } else
1089 /* general case */
1090 while (ptr <= end) {
1091 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
1092 state->start = state->ptr = ptr++;
1093 status = SRE_MATCH(state, pattern);
1094 if (status != 0)
1095 break;
1096 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001097
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001098 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001099}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001100
Guido van Rossumb700df92000-03-31 14:59:30 +00001101
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001102#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001103
1104/* -------------------------------------------------------------------- */
1105/* factories and destructors */
1106
1107/* see sre.h for object declarations */
1108
1109staticforward PyTypeObject Pattern_Type;
1110staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001111staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001112
1113static PyObject *
1114_compile(PyObject* self_, PyObject* args)
1115{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001116 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001117
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001118 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001119 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001120
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001121 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001122 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001123 PyObject* code;
1124 int groups = 0;
1125 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001126 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001127 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001128 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001129 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001130
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001131 code = PySequence_Fast(code, "code argument must be a sequence");
1132 if (!code)
1133 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001134
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001135#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001136 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001137#else
1138 n = PySequence_Length(code);
1139#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001140
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001141 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1142 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001143 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001144 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001145 }
1146
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001147 for (i = 0; i < n; i++) {
1148 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001149 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001150 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001151
1152 Py_DECREF(code);
1153
1154 if (PyErr_Occurred())
1155 return NULL;
1156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001157 Py_INCREF(pattern);
1158 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001159
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001160 self->flags = flags;
1161
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001162 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001163
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001164 Py_XINCREF(groupindex);
1165 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001166
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001167 Py_XINCREF(indexgroup);
1168 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001169
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001170 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001171}
1172
1173static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001174sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001175{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001176 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001177}
1178
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001179static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001180sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001181{
1182 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001183 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001184 return NULL;
1185 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001186 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001187#if defined(HAVE_UNICODE)
1188 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001189 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001190#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001191 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001192}
1193
Guido van Rossumb700df92000-03-31 14:59:30 +00001194LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001195state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1196 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001197{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001198 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001199
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001200 PyBufferProcs *buffer;
1201 int i, count;
1202 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001203
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001204 /* get pointer to string buffer */
1205 buffer = string->ob_type->tp_as_buffer;
1206 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1207 buffer->bf_getsegcount(string, NULL) != 1) {
1208 PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
1209 return NULL;
1210 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001211
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001212 /* determine buffer size */
1213 count = buffer->bf_getreadbuffer(string, 0, &ptr);
1214 if (count < 0) {
1215 /* sanity check */
1216 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1217 return NULL;
1218 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001219
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001220 /* determine character size */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001221#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001222 state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001223#else
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001224 state->charsize = 1;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001225#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001226
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001227 count /= state->charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001228
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001229 /* adjust boundaries */
1230 if (start < 0)
1231 start = 0;
1232 else if (start > count)
1233 start = count;
Guido van Rossumb700df92000-03-31 14:59:30 +00001234
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001235 if (end < 0)
1236 end = 0;
1237 else if (end > count)
1238 end = count;
Guido van Rossumb700df92000-03-31 14:59:30 +00001239
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001240 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001241
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 state->start = (void*) ((char*) ptr + start * state->charsize);
1243 state->end = (void*) ((char*) ptr + end * state->charsize);
1244
1245 Py_INCREF(string);
1246 state->string = string;
1247 state->pos = start;
1248 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001249
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001250 state->lastmark = 0;
1251
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001252 /* FIXME: dynamic! */
1253 for (i = 0; i < SRE_MARK_SIZE; i++)
1254 state->mark[i] = NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001255
Fredrik Lundh6f013982000-07-03 18:44:21 +00001256 state->lastindex = -1;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001257
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001258 state->stack = NULL;
1259 state->stackbase = 0;
1260 state->stacksize = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001261
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001262 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001263 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001264#if defined(HAVE_UNICODE)
1265 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001266 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001267#endif
1268 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001269 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001270
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001271 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001272}
1273
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001274LOCAL(void)
1275state_fini(SRE_STATE* state)
1276{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001277 Py_XDECREF(state->string);
1278 stack_free(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001279}
1280
1281LOCAL(PyObject*)
1282state_getslice(SRE_STATE* state, int index, PyObject* string)
1283{
1284 index = (index - 1) * 2;
1285
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001286 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1287 Py_INCREF(Py_None);
1288 return Py_None;
1289 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001290
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001291 return PySequence_GetSlice(
1292 string,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001293 ((char*)state->mark[index] - (char*)state->beginning) /
1294 state->charsize,
1295 ((char*)state->mark[index+1] - (char*)state->beginning) /
1296 state->charsize
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001297 );
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001298}
1299
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001300static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001301pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001302{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001303 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001304
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001305 MatchObject* match;
1306 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001307 char* base;
1308 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001309
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001310 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001311
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001312 /* create match object (with room for extra group marks) */
1313 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001314 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001315 if (!match)
1316 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001317
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001318 Py_INCREF(pattern);
1319 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001320
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001321 Py_INCREF(state->string);
1322 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001323
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001324 match->regs = NULL;
1325 match->groups = pattern->groups+1;
1326
1327 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001328
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001329 base = (char*) state->beginning;
1330 n = state->charsize;
1331
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001332 match->mark[0] = ((char*) state->start - base) / n;
1333 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001334
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001335 for (i = j = 0; i < pattern->groups; i++, j+=2)
1336 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1337 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1338 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1339 } else
1340 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1341
1342 match->pos = state->pos;
1343 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001344
Fredrik Lundh6f013982000-07-03 18:44:21 +00001345 match->lastindex = state->lastindex;
1346
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001347 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001348
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001349 } else if (status < 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001350
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001351 /* internal error */
1352 PyErr_SetString(
1353 PyExc_RuntimeError, "internal error in regular expression engine"
1354 );
1355 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001358
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001359 Py_INCREF(Py_None);
1360 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001361}
1362
1363static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001364pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001365{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001366 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001367
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001368 ScannerObject* self;
1369
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001370 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001371 int start = 0;
1372 int end = INT_MAX;
1373 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1374 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001375
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001376 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001377 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001378 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001379 return NULL;
1380
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001381 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001382 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001383 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001384 return NULL;
1385 }
1386
1387 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001388 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001389
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001390 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001391}
1392
Guido van Rossumb700df92000-03-31 14:59:30 +00001393static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001394pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001395{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001396 Py_XDECREF(self->pattern);
1397 Py_XDECREF(self->groupindex);
1398 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001399}
1400
1401static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001402pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001403{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001404 SRE_STATE state;
1405 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001407 PyObject* string;
1408 int start = 0;
1409 int end = INT_MAX;
1410 if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
1411 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001413 string = state_init(&state, self, string, start, end);
1414 if (!string)
1415 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001416
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001417 state.ptr = state.start;
1418
1419 if (state.charsize == 1) {
1420 status = sre_match(&state, PatternObject_GetCode(self));
1421 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001422#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001423 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001424#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001425 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001427 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001429 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001430}
1431
1432static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001433pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001434{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001435 SRE_STATE state;
1436 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001437
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001438 PyObject* string;
1439 int start = 0;
1440 int end = INT_MAX;
1441 if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
1442 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001443
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001444 string = state_init(&state, self, string, start, end);
1445 if (!string)
1446 return NULL;
1447
1448 if (state.charsize == 1) {
1449 status = sre_search(&state, PatternObject_GetCode(self));
1450 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001451#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001452 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001453#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001454 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001455
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001456 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001457
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001458 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001459}
1460
1461static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001462call(char* function, PyObject* args)
1463{
1464 PyObject* name;
1465 PyObject* module;
1466 PyObject* func;
1467 PyObject* result;
1468
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001469 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001470 if (!name)
1471 return NULL;
1472 module = PyImport_Import(name);
1473 Py_DECREF(name);
1474 if (!module)
1475 return NULL;
1476 func = PyObject_GetAttrString(module, function);
1477 Py_DECREF(module);
1478 if (!func)
1479 return NULL;
1480 result = PyObject_CallObject(func, args);
1481 Py_DECREF(func);
1482 Py_DECREF(args);
1483 return result;
1484}
1485
1486static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001487pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001488{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001489 PyObject* template;
1490 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001491 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001492 if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
1493 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001494
1495 /* delegate to Python code */
1496 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1497}
1498
1499static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001500pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001501{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001502 PyObject* template;
1503 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001504 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001505 if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
1506 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001507
1508 /* delegate to Python code */
1509 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1510}
1511
1512static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001513pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001514{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001515 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001516 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001517 if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
1518 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001519
1520 /* delegate to Python code */
1521 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1522}
1523
1524static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001525pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001526{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001527 SRE_STATE state;
1528 PyObject* list;
1529 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001530 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001531
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001532 PyObject* string;
1533 int start = 0;
1534 int end = INT_MAX;
1535 if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
1536 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001537
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001538 string = state_init(&state, self, string, start, end);
1539 if (!string)
1540 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001541
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001542 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001543
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001544 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001546 PyObject* item;
1547
1548 state.ptr = state.start;
1549
1550 if (state.charsize == 1) {
1551 status = sre_search(&state, PatternObject_GetCode(self));
1552 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001553#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001554 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001555#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001556 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001557
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001558 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001559
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001560 /* don't bother to build a match object */
1561 switch (self->groups) {
1562 case 0:
1563 item = PySequence_GetSlice(
1564 string,
1565 ((char*) state.start - (char*) state.beginning) /
1566 state.charsize,
1567 ((char*) state.ptr - (char*) state.beginning) /
1568 state.charsize);
1569 if (!item)
1570 goto error;
1571 break;
1572 case 1:
1573 item = state_getslice(&state, 1, string);
1574 if (!item)
1575 goto error;
1576 break;
1577 default:
1578 item = PyTuple_New(self->groups);
1579 if (!item)
1580 goto error;
1581 for (i = 0; i < self->groups; i++) {
1582 PyObject* o = state_getslice(&state, i+1, string);
1583 if (!o) {
1584 Py_DECREF(item);
1585 goto error;
1586 }
1587 PyTuple_SET_ITEM(item, i, o);
1588 }
1589 break;
1590 }
1591
1592 if (PyList_Append(list, item) < 0) {
1593 Py_DECREF(item);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001594 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001595 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001596
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001597 if (state.ptr == state.start)
1598 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001599 else
1600 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001602 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001603
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001604 if (status == 0)
1605 break;
1606
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001607 /* internal error */
1608 PyErr_SetString(
1609 PyExc_RuntimeError,
1610 "internal error in regular expression engine"
1611 );
1612 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001613
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001614 }
1615 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001616
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001617 state_fini(&state);
1618 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001619
1620error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001621 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001622 state_fini(&state);
1623 return NULL;
1624
Guido van Rossumb700df92000-03-31 14:59:30 +00001625}
1626
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001627static PyMethodDef pattern_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001628 {"match", (PyCFunction) pattern_match, 1},
1629 {"search", (PyCFunction) pattern_search, 1},
1630 {"sub", (PyCFunction) pattern_sub, 1},
1631 {"subn", (PyCFunction) pattern_subn, 1},
1632 {"split", (PyCFunction) pattern_split, 1},
1633 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001634 /* experimental */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001635 {"scanner", (PyCFunction) pattern_scanner, 1},
1636 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001637};
1638
1639static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001640pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001641{
1642 PyObject* res;
1643
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001644 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001645
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001646 if (res)
1647 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001648
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001649 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001650
1651 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001652 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001653 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001654 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001655 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001656
1657 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001658 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001659
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001660 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001661 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001662
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001663 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001664 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001665 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001666 }
1667
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001668 PyErr_SetString(PyExc_AttributeError, name);
1669 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001670}
1671
1672statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001673 PyObject_HEAD_INIT(NULL)
1674 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001675 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001676 (destructor)pattern_dealloc, /*tp_dealloc*/
1677 0, /*tp_print*/
1678 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001679};
1680
1681/* -------------------------------------------------------------------- */
1682/* match methods */
1683
1684static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001685match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001686{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001687 Py_XDECREF(self->regs);
1688 Py_XDECREF(self->string);
1689 Py_DECREF(self->pattern);
1690 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001691}
1692
1693static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001694match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001695{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001696 if (index < 0 || index >= self->groups) {
1697 /* raise IndexError if we were given a bad group number */
1698 PyErr_SetString(
1699 PyExc_IndexError,
1700 "no such group"
1701 );
1702 return NULL;
1703 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001704
Fredrik Lundh6f013982000-07-03 18:44:21 +00001705 index *= 2;
1706
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001707 if (self->string == Py_None || self->mark[index] < 0) {
1708 /* return default value if the string or group is undefined */
1709 Py_INCREF(def);
1710 return def;
1711 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001712
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001713 return PySequence_GetSlice(
1714 self->string, self->mark[index], self->mark[index+1]
1715 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001716}
1717
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001718static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001719match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001720{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001721 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001722
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001723 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001724 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001725
Fredrik Lundh6f013982000-07-03 18:44:21 +00001726 i = -1;
1727
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001728 if (self->pattern->groupindex) {
1729 index = PyObject_GetItem(self->pattern->groupindex, index);
1730 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001731 if (PyInt_Check(index))
1732 i = (int) PyInt_AS_LONG(index);
1733 Py_DECREF(index);
1734 } else
1735 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001736 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001737
1738 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001739}
1740
1741static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001742match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001743{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001744 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001745}
1746
1747static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001748match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001749{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001750 PyObject* result;
1751 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001753 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001755 switch (size) {
1756 case 0:
1757 result = match_getslice(self, Py_False, Py_None);
1758 break;
1759 case 1:
1760 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1761 break;
1762 default:
1763 /* fetch multiple items */
1764 result = PyTuple_New(size);
1765 if (!result)
1766 return NULL;
1767 for (i = 0; i < size; i++) {
1768 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001769 self, PyTuple_GET_ITEM(args, i), Py_None
1770 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001771 if (!item) {
1772 Py_DECREF(result);
1773 return NULL;
1774 }
1775 PyTuple_SET_ITEM(result, i, item);
1776 }
1777 break;
1778 }
1779 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001780}
1781
1782static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001783match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001784{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001785 PyObject* result;
1786 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001787
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001788 PyObject* def = Py_None;
1789 if (!PyArg_ParseTuple(args, "|O:groups", &def))
1790 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001791
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001792 result = PyTuple_New(self->groups-1);
1793 if (!result)
1794 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001795
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001796 for (index = 1; index < self->groups; index++) {
1797 PyObject* item;
1798 item = match_getslice_by_index(self, index, def);
1799 if (!item) {
1800 Py_DECREF(result);
1801 return NULL;
1802 }
1803 PyTuple_SET_ITEM(result, index-1, item);
1804 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001805
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001806 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001807}
1808
1809static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001810match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001811{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001812 PyObject* result;
1813 PyObject* keys;
1814 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001815
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001816 PyObject* def = Py_None;
1817 if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
1818 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001819
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001820 result = PyDict_New();
1821 if (!result || !self->pattern->groupindex)
1822 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001823
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001824 keys = PyMapping_Keys(self->pattern->groupindex);
1825 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001826 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001827 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001828 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001829
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001830 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1831 PyObject* key;
1832 PyObject* item;
1833 key = PyList_GET_ITEM(keys, index);
1834 if (!key) {
1835 Py_DECREF(keys);
1836 Py_DECREF(result);
1837 return NULL;
1838 }
1839 item = match_getslice(self, key, def);
1840 if (!item) {
1841 Py_DECREF(key);
1842 Py_DECREF(keys);
1843 Py_DECREF(result);
1844 return NULL;
1845 }
1846 /* FIXME: <fl> this can fail, right? */
1847 PyDict_SetItem(result, key, item);
1848 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001849
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001850 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001851
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001852 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001853}
1854
1855static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001856match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001857{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001858 int index;
1859
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001860 PyObject* index_ = Py_False; /* zero */
1861 if (!PyArg_ParseTuple(args, "|O:start", &index_))
1862 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001863
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001864 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001865
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001866 if (index < 0 || index >= self->groups) {
1867 PyErr_SetString(
1868 PyExc_IndexError,
1869 "no such group"
1870 );
1871 return NULL;
1872 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001873
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001874 if (self->mark[index*2] < 0) {
1875 Py_INCREF(Py_None);
1876 return Py_None;
1877 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001878
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001879 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00001880}
1881
1882static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001883match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001884{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001885 int index;
1886
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001887 PyObject* index_ = Py_False; /* zero */
1888 if (!PyArg_ParseTuple(args, "|O:end", &index_))
1889 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001890
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001891 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001892
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 if (index < 0 || index >= self->groups) {
1894 PyErr_SetString(
1895 PyExc_IndexError,
1896 "no such group"
1897 );
1898 return NULL;
1899 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001900
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001901 if (self->mark[index*2] < 0) {
1902 Py_INCREF(Py_None);
1903 return Py_None;
1904 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001905
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001906 return Py_BuildValue("i", self->mark[index*2+1]);
1907}
1908
1909LOCAL(PyObject*)
1910_pair(int i1, int i2)
1911{
1912 PyObject* pair;
1913 PyObject* item;
1914
1915 pair = PyTuple_New(2);
1916 if (!pair)
1917 return NULL;
1918
1919 item = PyInt_FromLong(i1);
1920 if (!item)
1921 goto error;
1922 PyTuple_SET_ITEM(pair, 0, item);
1923
1924 item = PyInt_FromLong(i2);
1925 if (!item)
1926 goto error;
1927 PyTuple_SET_ITEM(pair, 1, item);
1928
1929 return pair;
1930
1931 error:
1932 Py_DECREF(pair);
1933 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001934}
1935
1936static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001937match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001938{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001939 int index;
1940
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001941 PyObject* index_ = Py_False; /* zero */
1942 if (!PyArg_ParseTuple(args, "|O:span", &index_))
1943 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001944
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001945 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001946
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001947 if (index < 0 || index >= self->groups) {
1948 PyErr_SetString(
1949 PyExc_IndexError,
1950 "no such group"
1951 );
1952 return NULL;
1953 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001954
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001955 if (self->mark[index*2] < 0) {
1956 Py_INCREF(Py_None);
1957 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001958 return Py_BuildValue("OO", Py_None, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001959 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001960
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001961 return _pair(self->mark[index*2], self->mark[index*2+1]);
1962}
1963
1964static PyObject*
1965match_regs(MatchObject* self)
1966{
1967 PyObject* regs;
1968 PyObject* item;
1969 int index;
1970
1971 regs = PyTuple_New(self->groups);
1972 if (!regs)
1973 return NULL;
1974
1975 for (index = 0; index < self->groups; index++) {
1976 item = _pair(self->mark[index*2], self->mark[index*2+1]);
1977 if (!item) {
1978 Py_DECREF(regs);
1979 return NULL;
1980 }
1981 PyTuple_SET_ITEM(regs, index, item);
1982 }
1983
1984 Py_INCREF(regs);
1985 self->regs = regs;
1986
1987 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00001988}
1989
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001990static PyMethodDef match_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001991 {"group", (PyCFunction) match_group, 1},
1992 {"start", (PyCFunction) match_start, 1},
1993 {"end", (PyCFunction) match_end, 1},
1994 {"span", (PyCFunction) match_span, 1},
1995 {"groups", (PyCFunction) match_groups, 1},
1996 {"groupdict", (PyCFunction) match_groupdict, 1},
1997 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001998};
1999
2000static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002001match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002002{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002003 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002004
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002005 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2006 if (res)
2007 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002008
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002009 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002010
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002011 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002012 if (self->lastindex >= 0)
2013 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002014 Py_INCREF(Py_None);
2015 return Py_None;
2016 }
2017
2018 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002019 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002020 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002021 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002022 );
2023 if (result)
2024 return result;
2025 PyErr_Clear();
2026 }
2027 Py_INCREF(Py_None);
2028 return Py_None;
2029 }
2030
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002031 if (!strcmp(name, "string")) {
2032 if (self->string) {
2033 Py_INCREF(self->string);
2034 return self->string;
2035 } else {
2036 Py_INCREF(Py_None);
2037 return Py_None;
2038 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002039 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002040
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002041 if (!strcmp(name, "regs")) {
2042 if (self->regs) {
2043 Py_INCREF(self->regs);
2044 return self->regs;
2045 } else
2046 return match_regs(self);
2047 }
2048
2049 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002050 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002051 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002052 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002053
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 if (!strcmp(name, "pos"))
2055 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002056
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002057 if (!strcmp(name, "endpos"))
2058 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002059
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 PyErr_SetString(PyExc_AttributeError, name);
2061 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002062}
2063
2064/* FIXME: implement setattr("string", None) as a special case (to
2065 detach the associated string, if any */
2066
2067statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002068 PyObject_HEAD_INIT(NULL)
2069 0, "SRE_Match",
2070 sizeof(MatchObject), sizeof(int),
2071 (destructor)match_dealloc, /*tp_dealloc*/
2072 0, /*tp_print*/
2073 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002074};
2075
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002076/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002077/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002078
2079static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002080scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002081{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002083 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002084 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002085}
2086
2087static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002088scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002089{
2090 SRE_STATE* state = &self->state;
2091 PyObject* match;
2092 int status;
2093
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002094 state->lastindex = -1;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002095 state->ptr = state->start;
2096
2097 if (state->charsize == 1) {
2098 status = sre_match(state, PatternObject_GetCode(self->pattern));
2099 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002100#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002101 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002102#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002103 }
2104
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002105 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002106 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002107
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002108 if (status == 0 || state->ptr == state->start)
2109 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002110 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002111 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002112
2113 return match;
2114}
2115
2116
2117static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002118scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002119{
2120 SRE_STATE* state = &self->state;
2121 PyObject* match;
2122 int status;
2123
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002124 state->lastindex = -1;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002125 state->ptr = state->start;
2126
2127 if (state->charsize == 1) {
2128 status = sre_search(state, PatternObject_GetCode(self->pattern));
2129 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002130#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002131 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002132#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002133 }
2134
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002135 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002137
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002138 if (status == 0 || state->ptr == state->start)
2139 state->start = (void*) ((char*) state->ptr + state->charsize);
2140 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002141 state->start = state->ptr;
2142
2143 return match;
2144}
2145
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002146static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 {"match", (PyCFunction) scanner_match, 0},
2148 {"search", (PyCFunction) scanner_search, 0},
2149 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002150};
2151
2152static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002153scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002154{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002155 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002157 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2158 if (res)
2159 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002160
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002161 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002162
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163 /* attributes */
2164 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002165 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002166 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002167 }
2168
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002169 PyErr_SetString(PyExc_AttributeError, name);
2170 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002171}
2172
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002173statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002174 PyObject_HEAD_INIT(NULL)
2175 0, "SRE_Scanner",
2176 sizeof(ScannerObject), 0,
2177 (destructor)scanner_dealloc, /*tp_dealloc*/
2178 0, /*tp_print*/
2179 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002180};
2181
Guido van Rossumb700df92000-03-31 14:59:30 +00002182static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002183 {"compile", _compile, 1},
2184 {"getcodesize", sre_codesize, 1},
2185 {"getlower", sre_getlower, 1},
2186 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002187};
2188
2189void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002190#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002191__declspec(dllexport)
2192#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002193init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002194{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002195 /* Patch object types */
2196 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002197 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002198
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002199 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002200}
2201
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002202#endif /* !defined(SRE_RECURSIVE) */