blob: 28ec61c21006830e55ab14c44784f2be268174df [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Guido van Rossumb700df92000-03-31 14:59:30 +000027 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000028 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000029 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000030 * This version of the SRE library can be redistributed under CNRI's
31 * Python 1.6 license. For any other use, please contact Secret Labs
32 * AB (info@pythonware.com).
33 *
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000035 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000036 * other compatibility work.
37 */
38
39#ifndef SRE_RECURSIVE
40
Fredrik Lundhebc37b22000-10-28 19:30:41 +000041char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000042
43#include "Python.h"
44
45#include "sre.h"
46
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000047#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000048
Fredrik Lundh436c3d52000-06-29 08:58:44 +000049/* name of this module, minus the leading underscore */
50#define MODULE "sre"
51
Guido van Rossumb700df92000-03-31 14:59:30 +000052/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000053#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000054
Fredrik Lundh436c3d52000-06-29 08:58:44 +000055#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000056/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000057#define HAVE_UNICODE
58#endif
59
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000061/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062
Fredrik Lundh33accc12000-08-27 20:59:47 +000063/* prevent run-away recursion (bad patterns on long strings) */
64
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000065#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000066#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
67/* require smaller recursion limit for a number of 64-bit platforms:
68 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
69/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
70#define USE_RECURSION_LIMIT 7500
71#else
72#define USE_RECURSION_LIMIT 10000
73#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000074#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000075
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000076/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000077#define USE_FAST_SEARCH
78
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000080#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081
82/* -------------------------------------------------------------------- */
83
Fredrik Lundh80946112000-06-29 18:03:25 +000084#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000085#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000086#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000087/* fastest possible local call under MSVC */
88#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000089#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000090#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#else
92#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000093#endif
94
95/* error codes */
96#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000097#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000098#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000099#define SRE_ERROR_MEMORY -9 /* out of memory */
100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000101#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000102#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000103#else
104#define TRACE(v)
105#endif
106
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000107/* -------------------------------------------------------------------- */
108/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000109
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000110/* default character predicates (run sre_chars.py to regenerate tables) */
111
112#define SRE_DIGIT_MASK 1
113#define SRE_SPACE_MASK 2
114#define SRE_LINEBREAK_MASK 4
115#define SRE_ALNUM_MASK 8
116#define SRE_WORD_MASK 16
117
118static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1192, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12125, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1230, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
125
Fredrik Lundhb389df32000-06-29 12:48:37 +0000126static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000012710, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12827, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12944, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13061, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
131108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
132122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
133106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
134120, 121, 122, 123, 124, 125, 126, 127 };
135
Fredrik Lundhb389df32000-06-29 12:48:37 +0000136static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000137{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000138 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000139}
140
141#define SRE_IS_DIGIT(ch)\
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
143#define SRE_IS_SPACE(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
145#define SRE_IS_LINEBREAK(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
147#define SRE_IS_ALNUM(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
149#define SRE_IS_WORD(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000151
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000152/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000153
Fredrik Lundhb389df32000-06-29 12:48:37 +0000154static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000155{
156 return ((ch) < 256 ? tolower((ch)) : ch);
157}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000158#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
159#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
160#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
161#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
162#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
163
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000164/* unicode-specific character predicates */
165
166#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000167static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000168{
169 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
170}
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000171#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
172#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
173#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000174#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000175#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000176#endif
177
Guido van Rossumb700df92000-03-31 14:59:30 +0000178LOCAL(int)
179sre_category(SRE_CODE category, unsigned int ch)
180{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000181 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000182
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000183 case SRE_CATEGORY_DIGIT:
184 return SRE_IS_DIGIT(ch);
185 case SRE_CATEGORY_NOT_DIGIT:
186 return !SRE_IS_DIGIT(ch);
187 case SRE_CATEGORY_SPACE:
188 return SRE_IS_SPACE(ch);
189 case SRE_CATEGORY_NOT_SPACE:
190 return !SRE_IS_SPACE(ch);
191 case SRE_CATEGORY_WORD:
192 return SRE_IS_WORD(ch);
193 case SRE_CATEGORY_NOT_WORD:
194 return !SRE_IS_WORD(ch);
195 case SRE_CATEGORY_LINEBREAK:
196 return SRE_IS_LINEBREAK(ch);
197 case SRE_CATEGORY_NOT_LINEBREAK:
198 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000199
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000200 case SRE_CATEGORY_LOC_WORD:
201 return SRE_LOC_IS_WORD(ch);
202 case SRE_CATEGORY_LOC_NOT_WORD:
203 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000204
205#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000206 case SRE_CATEGORY_UNI_DIGIT:
207 return SRE_UNI_IS_DIGIT(ch);
208 case SRE_CATEGORY_UNI_NOT_DIGIT:
209 return !SRE_UNI_IS_DIGIT(ch);
210 case SRE_CATEGORY_UNI_SPACE:
211 return SRE_UNI_IS_SPACE(ch);
212 case SRE_CATEGORY_UNI_NOT_SPACE:
213 return !SRE_UNI_IS_SPACE(ch);
214 case SRE_CATEGORY_UNI_WORD:
215 return SRE_UNI_IS_WORD(ch);
216 case SRE_CATEGORY_UNI_NOT_WORD:
217 return !SRE_UNI_IS_WORD(ch);
218 case SRE_CATEGORY_UNI_LINEBREAK:
219 return SRE_UNI_IS_LINEBREAK(ch);
220 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
221 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000222#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000223 }
224 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000225}
226
227/* helpers */
228
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000229static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000230mark_fini(SRE_STATE* state)
231{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000232 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000233 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000234 state->mark_stack = NULL;
235 }
236 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000237}
238
239static int
240mark_save(SRE_STATE* state, int lo, int hi)
241{
242 void* stack;
243 int size;
244 int minsize, newsize;
245
246 if (hi <= lo)
247 return 0;
248
249 size = (hi - lo) + 1;
250
251 newsize = state->mark_stack_size;
252 minsize = state->mark_stack_base + size;
253
254 if (newsize < minsize) {
255 /* create new stack */
256 if (!newsize) {
257 newsize = 512;
258 if (newsize < minsize)
259 newsize = minsize;
260 TRACE(("allocate stack %d\n", newsize));
261 stack = malloc(sizeof(void*) * newsize);
262 } else {
263 /* grow the stack */
264 while (newsize < minsize)
265 newsize += newsize;
266 TRACE(("grow stack to %d\n", newsize));
267 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
268 }
269 if (!stack) {
270 mark_fini(state);
271 return SRE_ERROR_MEMORY;
272 }
273 state->mark_stack = stack;
274 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000275 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000276
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000277 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278
279 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
280 size * sizeof(void*));
281
282 state->mark_stack_base += size;
283
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000284 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000285}
286
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000287static int
288mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000289{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000290 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000291
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000292 if (hi <= lo)
293 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000294
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000295 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000296
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000297 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000298
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000299 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000300
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000301 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
302 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000303
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000304 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000305}
306
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000307/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000308
309#define SRE_CHAR unsigned char
310#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000311#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000312#define SRE_CHARSET sre_charset
313#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000314#define SRE_MATCH sre_match
315#define SRE_SEARCH sre_search
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000316
317#if defined(HAVE_UNICODE)
318
Guido van Rossumb700df92000-03-31 14:59:30 +0000319#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000320#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000321#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000322
Guido van Rossumb700df92000-03-31 14:59:30 +0000323#undef SRE_SEARCH
324#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000325#undef SRE_INFO
326#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000327#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000328#undef SRE_AT
329#undef SRE_CHAR
330
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000331/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000332
333#define SRE_CHAR Py_UNICODE
334#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000335#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000336#define SRE_CHARSET sre_ucharset
337#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000338#define SRE_MATCH sre_umatch
339#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000340#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000341
342#endif /* SRE_RECURSIVE */
343
344/* -------------------------------------------------------------------- */
345/* String matching engine */
346
347/* the following section is compiled twice, with different character
348 settings */
349
350LOCAL(int)
351SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
352{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000354
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000355 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000357 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000358
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000359 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000360 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000361 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000362
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000363 case SRE_AT_BEGINNING_LINE:
364 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000365 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000366
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000367 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000368 return (((void*) (ptr+1) == state->end &&
369 SRE_IS_LINEBREAK((int) ptr[0])) ||
370 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000371
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000372 case SRE_AT_END_LINE:
373 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000374 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000375
Fredrik Lundh770617b2001-01-14 15:06:11 +0000376 case SRE_AT_END_STRING:
377 return ((void*) ptr == state->end);
378
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000379 case SRE_AT_BOUNDARY:
380 if (state->beginning == state->end)
381 return 0;
382 that = ((void*) ptr > state->beginning) ?
383 SRE_IS_WORD((int) ptr[-1]) : 0;
384 this = ((void*) ptr < state->end) ?
385 SRE_IS_WORD((int) ptr[0]) : 0;
386 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000387
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000388 case SRE_AT_NON_BOUNDARY:
389 if (state->beginning == state->end)
390 return 0;
391 that = ((void*) ptr > state->beginning) ?
392 SRE_IS_WORD((int) ptr[-1]) : 0;
393 this = ((void*) ptr < state->end) ?
394 SRE_IS_WORD((int) ptr[0]) : 0;
395 return this == that;
396 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000397
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000398 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000399}
400
401LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000402SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000403{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000405
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000406 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000407
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000408 for (;;) {
409 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000412 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 if (ch == set[0])
414 return ok;
415 set++;
416 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000417
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000418 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000419 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000420 if (set[0] <= ch && ch <= set[1])
421 return ok;
422 set += 2;
423 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000424
Fredrik Lundh3562f112000-07-02 12:00:07 +0000425 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000426 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000427 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
428 return ok;
429 set += 16;
430 break;
431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000432 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000433 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000434 if (sre_category(set[0], (int) ch))
435 return ok;
436 set += 1;
437 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000438
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000439 case SRE_OP_NEGATE:
440 ok = !ok;
441 break;
442
443 case SRE_OP_FAILURE:
444 return !ok;
445
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000446 default:
447 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000448 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000449 return 0;
450 }
451 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000452}
453
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000454LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
455
456LOCAL(int)
457SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
458{
459 SRE_CODE chr;
460 SRE_CHAR* ptr = state->ptr;
461 SRE_CHAR* end = state->end;
462 int i;
463
464 /* adjust end */
465 if (maxcount < end - ptr && maxcount != 65535)
466 end = ptr + maxcount;
467
468 switch (pattern[0]) {
469
470 case SRE_OP_ANY:
471 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000472 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000473 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
474 ptr++;
475 break;
476
477 case SRE_OP_ANY_ALL:
478 /* repeated dot wildcare. skip to the end of the target
479 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000480 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000481 ptr = end;
482 break;
483
484 case SRE_OP_LITERAL:
485 /* repeated literal */
486 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000487 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000488 while (ptr < end && (SRE_CODE) *ptr == chr)
489 ptr++;
490 break;
491
492 case SRE_OP_LITERAL_IGNORE:
493 /* repeated literal */
494 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000495 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000496 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
497 ptr++;
498 break;
499
500 case SRE_OP_NOT_LITERAL:
501 /* repeated non-literal */
502 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000503 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000504 while (ptr < end && (SRE_CODE) *ptr != chr)
505 ptr++;
506 break;
507
508 case SRE_OP_NOT_LITERAL_IGNORE:
509 /* repeated non-literal */
510 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000511 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000512 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
513 ptr++;
514 break;
515
516 case SRE_OP_IN:
517 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000518 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
519 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000520 ptr++;
521 break;
522
523 default:
524 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000525 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000526 while ((SRE_CHAR*) state->ptr < end) {
527 i = SRE_MATCH(state, pattern, level);
528 if (i < 0)
529 return i;
530 if (!i)
531 break;
532 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000533 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
534 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000535 return (SRE_CHAR*) state->ptr - ptr;
536 }
537
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000538 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000539 return ptr - (SRE_CHAR*) state->ptr;
540}
541
Fredrik Lundh33accc12000-08-27 20:59:47 +0000542#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000543LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000544SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
545{
546 /* check if an SRE_OP_INFO block matches at the current position.
547 returns the number of SRE_CODE objects to skip if successful, 0
548 if no match */
549
550 SRE_CHAR* end = state->end;
551 SRE_CHAR* ptr = state->ptr;
552 int i;
553
554 /* check minimal length */
555 if (pattern[3] && (end - ptr) < pattern[3])
556 return 0;
557
558 /* check known prefix */
559 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
560 /* <length> <skip> <prefix data> <overlap data> */
561 for (i = 0; i < pattern[5]; i++)
562 if ((SRE_CODE) ptr[i] != pattern[7 + i])
563 return 0;
564 return pattern[0] + 2 * pattern[6];
565 }
566 return pattern[0];
567}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000568#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000569
570LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000571SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000572{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000573 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000574 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000575
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000576 SRE_CHAR* end = state->end;
577 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000578 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000579 SRE_REPEAT* rp;
580 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000581 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000582
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000583 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000584
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000585 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000586
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000587#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000588 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000589 return SRE_ERROR_RECURSION_LIMIT;
590#endif
591
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000592#if defined(USE_RECURSION_LIMIT)
593 if (level > USE_RECURSION_LIMIT)
594 return SRE_ERROR_RECURSION_LIMIT;
595#endif
596
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000597 if (pattern[0] == SRE_OP_INFO) {
598 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000599 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000600 if (pattern[3] && (end - ptr) < pattern[3]) {
601 TRACE(("reject (got %d chars, need %d)\n",
602 (end - ptr), pattern[3]));
603 return 0;
604 }
605 pattern += pattern[1] + 1;
606 }
607
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000608 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000609
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000610 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000611
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000612 case SRE_OP_FAILURE:
613 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000614 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000615 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000616
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000617 case SRE_OP_SUCCESS:
618 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000619 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000620 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000621 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000622
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000623 case SRE_OP_AT:
624 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000625 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000626 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000627 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000628 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000629 pattern++;
630 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000631
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000632 case SRE_OP_CATEGORY:
633 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000634 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000635 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000636 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000637 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000638 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000639 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000640 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000641
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000642 case SRE_OP_LITERAL:
643 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000644 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000645 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000646 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000647 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000648 pattern++;
649 ptr++;
650 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000651
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000652 case SRE_OP_NOT_LITERAL:
653 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000654 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000655 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000656 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000657 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000658 pattern++;
659 ptr++;
660 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000661
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000662 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000663 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000664 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000665 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000666 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
667 return 0;
668 ptr++;
669 break;
670
671 case SRE_OP_ANY_ALL:
672 /* match anything */
673 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000674 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000675 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000676 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000677 ptr++;
678 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000679
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000680 case SRE_OP_IN:
681 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000682 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000683 TRACE(("|%p|%p|IN\n", pattern, ptr));
684 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000685 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000686 pattern += pattern[0];
687 ptr++;
688 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000689
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000690 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000691 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000692 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000693 i = pattern[0];
694 {
695 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
696 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
697 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000698 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000699 while (p < e) {
700 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000701 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000702 p++; ptr++;
703 }
704 }
705 pattern++;
706 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000707
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000708 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000709 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000710 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000711 i = pattern[0];
712 {
713 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
714 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
715 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000716 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000717 while (p < e) {
718 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000719 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000720 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000721 p++; ptr++;
722 }
723 }
724 pattern++;
725 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000728 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000730 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000731 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 pattern++;
733 ptr++;
734 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000735
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000737 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000739 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000740 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 pattern++;
742 ptr++;
743 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000744
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000746 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000747 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000748 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000749 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000750 pattern += pattern[0];
751 ptr++;
752 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000753
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000754 case SRE_OP_MARK:
755 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000756 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000757 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000758 i = pattern[0];
759 if (i & 1)
760 state->lastindex = i/2 + 1;
761 if (i > state->lastmark)
762 state->lastmark = i;
763 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000764 pattern++;
765 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000766
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 case SRE_OP_JUMP:
768 case SRE_OP_INFO:
769 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000770 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000771 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000772 pattern += pattern[0];
773 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000774
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000775 case SRE_OP_ASSERT:
776 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000777 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000778 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000779 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000780 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000781 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000782 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000783 if (i <= 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000784 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000785 pattern += pattern[0];
786 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000787
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000788 case SRE_OP_ASSERT_NOT:
789 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000790 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000791 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000793 if (state->ptr >= state->beginning) {
794 i = SRE_MATCH(state, pattern + 2, level + 1);
795 if (i < 0)
796 return i;
797 if (i)
798 return 0;
799 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 pattern += pattern[0];
801 break;
802
803 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000804 /* alternation */
805 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000806 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000807 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000808 for (; pattern[0]; pattern += pattern[0]) {
809 if (pattern[1] == SRE_OP_LITERAL &&
810 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
811 continue;
812 if (pattern[1] == SRE_OP_IN &&
813 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
814 continue;
815 state->ptr = ptr;
816 i = SRE_MATCH(state, pattern + 1, level + 1);
817 if (i)
818 return i;
819 if (state->lastmark > lastmark) {
820 memset(
821 state->mark + lastmark + 1, 0,
822 (state->lastmark - lastmark) * sizeof(void*)
823 );
824 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000825 }
826 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000827 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000828
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000829 case SRE_OP_REPEAT_ONE:
830 /* match repeated sequence (maximizing regexp) */
831
832 /* this operator only works if the repeated item is
833 exactly one character wide, and we're not already
834 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000835 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000836
837 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
838
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000839 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000840 pattern[1], pattern[2]));
841
Fredrik Lundhe1869832000-08-01 22:47:49 +0000842 if (ptr + pattern[1] > end)
843 return 0; /* cannot match */
844
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000845 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000846
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000847 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
848 if (count < 0)
849 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000850
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000851 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000852
853 /* when we arrive here, count contains the number of
854 matches, and ptr points to the tail of the target
855 string. check if the rest of the pattern matches,
856 and backtrack if not. */
857
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000858 if (count < (int) pattern[1])
859 return 0;
860
861 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
862 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000863 state->ptr = ptr;
864 return 1;
865
866 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
867 /* tail starts with a literal. skip positions where
868 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000869 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000870 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000871 while (count >= (int) pattern[1] &&
872 (ptr >= end || *ptr != chr)) {
873 ptr--;
874 count--;
875 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000876 if (count < (int) pattern[1])
877 break;
878 state->ptr = ptr;
879 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000880 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000881 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000882 ptr--;
883 count--;
884 }
885
886 } else {
887 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000888 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000889 while (count >= (int) pattern[1]) {
890 state->ptr = ptr;
891 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000892 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000893 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000894 ptr--;
895 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000896 if (state->lastmark > lastmark) {
897 memset(
898 state->mark + lastmark + 1, 0,
899 (state->lastmark - lastmark) * sizeof(void*)
900 );
901 state->lastmark = lastmark;
902 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000903 }
904 }
905 return 0;
906
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000907 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000908 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +0000909 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000910 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000911 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000912 pattern[1], pattern[2]));
913
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000914 rep.count = -1;
915 rep.pattern = pattern;
916
917 /* install new repeat context */
918 rep.prev = state->repeat;
919 state->repeat = &rep;
920
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000921 state->ptr = ptr;
922 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000923
924 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000925
926 return i;
927
928 case SRE_OP_MAX_UNTIL:
929 /* maximizing repeat */
930 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
931
932 /* FIXME: we probably need to deal with zero-width
933 matches in here... */
934
935 rp = state->repeat;
936 if (!rp)
937 return SRE_ERROR_STATE;
938
939 state->ptr = ptr;
940
941 count = rp->count + 1;
942
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000943 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000944
945 if (count < rp->pattern[1]) {
946 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000947 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000948 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000949 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000950 if (i)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000951 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000952 rp->count = count - 1;
953 state->ptr = ptr;
954 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000955 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000956
957 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000958 /* we may have enough matches, but if we can
959 match another item, do so */
960 rp->count = count;
961 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000962 i = mark_save(state, 0, lastmark);
963 if (i < 0)
964 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000965 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000966 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000967 if (i)
968 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000969 i = mark_restore(state, 0, lastmark);
970 if (i < 0)
971 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000972 rp->count = count - 1;
973 state->ptr = ptr;
974 }
975
976 /* cannot match more repeated items here. make sure the
977 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000978 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000979 i = SRE_MATCH(state, pattern, level + 1);
980 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000981 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000982 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +0000983 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000984 return 0;
985
986 case SRE_OP_MIN_UNTIL:
987 /* minimizing repeat */
988 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
989
990 rp = state->repeat;
991 if (!rp)
992 return SRE_ERROR_STATE;
993
994 count = rp->count + 1;
995
Fredrik Lundh770617b2001-01-14 15:06:11 +0000996 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
997 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000998
999 state->ptr = ptr;
1000
1001 if (count < rp->pattern[1]) {
1002 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001003 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001004 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001005 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001006 if (i)
1007 return i;
1008 rp->count = count-1;
1009 state->ptr = ptr;
1010 return 0;
1011 }
1012
1013 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001014 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001015 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001016 if (i) {
1017 /* free(rp); */
1018 return i;
1019 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00001020 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001021 state->repeat = rp;
1022
1023 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1024 return 0;
1025
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001026 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001027 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001028 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001029 if (i)
1030 return i;
1031 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001032 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001033 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001034
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001035 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001036 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001037 return SRE_ERROR_ILLEGAL;
1038 }
1039 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001040
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001041 /* shouldn't end up here */
1042 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001043}
1044
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001045LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001046SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1047{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001048 SRE_CHAR* ptr = state->start;
1049 SRE_CHAR* end = state->end;
1050 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001051 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001052 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001053 SRE_CODE* prefix = NULL;
1054 SRE_CODE* charset = NULL;
1055 SRE_CODE* overlap = NULL;
1056 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001057
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001058 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001059 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001060 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001061
1062 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001063
1064 if (pattern[3] > 0) {
1065 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001066 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001067 end -= pattern[3]-1;
1068 if (end <= ptr)
1069 end = ptr+1;
1070 }
1071
Fredrik Lundh3562f112000-07-02 12:00:07 +00001072 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001073 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001074 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001075 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001076 prefix_skip = pattern[6];
1077 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001078 overlap = prefix + prefix_len - 1;
1079 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001080 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001081 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001082 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001083
1084 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001085 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001086
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001087 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1088 TRACE(("charset = %p\n", charset));
1089
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001090#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001091 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001092 /* pattern starts with a known prefix. use the overlap
1093 table to skip forward as fast as we possibly can */
1094 int i = 0;
1095 end = state->end;
1096 while (ptr < end) {
1097 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001098 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001099 if (!i)
1100 break;
1101 else
1102 i = overlap[i];
1103 } else {
1104 if (++i == prefix_len) {
1105 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001106 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1107 state->start = ptr + 1 - prefix_len;
1108 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001109 if (flags & SRE_INFO_LITERAL)
1110 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001111 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001112 if (status != 0)
1113 return status;
1114 /* close but no cigar -- try again */
1115 i = overlap[i];
1116 }
1117 break;
1118 }
1119
1120 }
1121 ptr++;
1122 }
1123 return 0;
1124 }
1125#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001126
Fredrik Lundh3562f112000-07-02 12:00:07 +00001127 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001128 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001129 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001130 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001131 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001132 for (;;) {
1133 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1134 ptr++;
1135 if (ptr == end)
1136 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001137 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001138 state->start = ptr;
1139 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001140 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001141 if (status != 0)
1142 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001143 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001144 } else if (charset) {
1145 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001146 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001147 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001148 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001149 ptr++;
1150 if (ptr == end)
1151 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001152 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001153 state->start = ptr;
1154 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001155 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001156 if (status != 0)
1157 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001158 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001159 }
1160 } else
1161 /* general case */
1162 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001163 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001164 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001165 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001166 if (status != 0)
1167 break;
1168 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001169
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001170 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001171}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001172
Guido van Rossumb700df92000-03-31 14:59:30 +00001173
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001174#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001175
1176/* -------------------------------------------------------------------- */
1177/* factories and destructors */
1178
1179/* see sre.h for object declarations */
1180
1181staticforward PyTypeObject Pattern_Type;
1182staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001183staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001184
1185static PyObject *
1186_compile(PyObject* self_, PyObject* args)
1187{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001188 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001189
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001190 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001191 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001192
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001193 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001194 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001195 PyObject* code;
1196 int groups = 0;
1197 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001198 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001199 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001200 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001201 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001202
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001203 code = PySequence_Fast(code, "code argument must be a sequence");
1204 if (!code)
1205 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001206
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001207#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001208 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001209#else
1210 n = PySequence_Length(code);
1211#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001212
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001213 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001214 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001215 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001216 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001217 }
1218
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001219 for (i = 0; i < n; i++) {
1220 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001221 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001222 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001223
1224 Py_DECREF(code);
1225
1226 if (PyErr_Occurred())
1227 return NULL;
1228
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001229 Py_INCREF(pattern);
1230 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001232 self->flags = flags;
1233
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001234 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001235
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001236 Py_XINCREF(groupindex);
1237 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001238
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001239 Py_XINCREF(indexgroup);
1240 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001241
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001243}
1244
1245static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001246sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001247{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001248 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001249}
1250
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001251static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001252sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001253{
1254 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001255 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001256 return NULL;
1257 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001258 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001259#if defined(HAVE_UNICODE)
1260 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001261 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001262#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001263 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001264}
1265
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001266LOCAL(void)
1267state_reset(SRE_STATE* state)
1268{
1269 int i;
1270
1271 state->lastmark = 0;
1272
1273 /* FIXME: dynamic! */
1274 for (i = 0; i < SRE_MARK_SIZE; i++)
1275 state->mark[i] = NULL;
1276
1277 state->lastindex = -1;
1278
1279 state->repeat = NULL;
1280
1281 mark_fini(state);
1282}
1283
Guido van Rossumb700df92000-03-31 14:59:30 +00001284LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001285state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1286 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001287{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001288 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001289
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001290 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001291 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001292 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001293
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001294 memset(state, 0, sizeof(SRE_STATE));
1295
1296 state->lastindex = -1;
1297
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001298#if defined(HAVE_UNICODE)
1299 if (PyUnicode_Check(string)) {
1300 /* unicode strings doesn't always support the buffer interface */
1301 ptr = (void*) PyUnicode_AS_DATA(string);
1302 bytes = PyUnicode_GET_DATA_SIZE(string);
1303 size = PyUnicode_GET_SIZE(string);
1304 state->charsize = sizeof(Py_UNICODE);
1305
1306 } else {
1307#endif
1308
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001309 /* get pointer to string buffer */
1310 buffer = string->ob_type->tp_as_buffer;
1311 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1312 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001313 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001314 return NULL;
1315 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001316
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001317 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001318 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1319 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001320 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1321 return NULL;
1322 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001323
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001324 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001325#if PY_VERSION_HEX >= 0x01060000
1326 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001327#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001328 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001329#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001330
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001331 if (PyString_Check(string) || bytes == size)
1332 state->charsize = 1;
1333#if defined(HAVE_UNICODE)
1334 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1335 state->charsize = sizeof(Py_UNICODE);
1336#endif
1337 else {
1338 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1339 return NULL;
1340 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001341
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001342#if defined(HAVE_UNICODE)
1343 }
1344#endif
1345
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001346 /* adjust boundaries */
1347 if (start < 0)
1348 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001349 else if (start > size)
1350 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001352 if (end < 0)
1353 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001354 else if (end > size)
1355 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001358
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001359 state->start = (void*) ((char*) ptr + start * state->charsize);
1360 state->end = (void*) ((char*) ptr + end * state->charsize);
1361
1362 Py_INCREF(string);
1363 state->string = string;
1364 state->pos = start;
1365 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001366
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001367 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001368 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001369#if defined(HAVE_UNICODE)
1370 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001371 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001372#endif
1373 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001374 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001375
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001376 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001377}
1378
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001379LOCAL(void)
1380state_fini(SRE_STATE* state)
1381{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001382 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001383 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001384}
1385
1386LOCAL(PyObject*)
1387state_getslice(SRE_STATE* state, int index, PyObject* string)
1388{
Fredrik Lundh58100642000-08-09 09:14:35 +00001389 int i, j;
1390
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001391 index = (index - 1) * 2;
1392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001393 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001394 i = j = 0;
1395 } else {
1396 i = ((char*)state->mark[index] - (char*)state->beginning) /
1397 state->charsize;
1398 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1399 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001400 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001401
Fredrik Lundh58100642000-08-09 09:14:35 +00001402 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001403}
1404
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001405static void
1406pattern_error(int status)
1407{
1408 switch (status) {
1409 case SRE_ERROR_RECURSION_LIMIT:
1410 PyErr_SetString(
1411 PyExc_RuntimeError,
1412 "maximum recursion limit exceeded"
1413 );
1414 break;
1415 case SRE_ERROR_MEMORY:
1416 PyErr_NoMemory();
1417 break;
1418 default:
1419 /* other error codes indicate compiler/engine bugs */
1420 PyErr_SetString(
1421 PyExc_RuntimeError,
1422 "internal error in regular expression engine"
1423 );
1424 }
1425}
1426
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001427static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001428pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001429{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001430 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001432 MatchObject* match;
1433 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001434 char* base;
1435 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001436
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001437 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001438
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001439 /* create match object (with room for extra group marks) */
1440 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001441 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001442 if (!match)
1443 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001444
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001445 Py_INCREF(pattern);
1446 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001447
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001448 Py_INCREF(state->string);
1449 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001450
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001451 match->regs = NULL;
1452 match->groups = pattern->groups+1;
1453
1454 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001455
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001456 base = (char*) state->beginning;
1457 n = state->charsize;
1458
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001459 match->mark[0] = ((char*) state->start - base) / n;
1460 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001461
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001462 for (i = j = 0; i < pattern->groups; i++, j+=2)
1463 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1464 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1465 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1466 } else
1467 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1468
1469 match->pos = state->pos;
1470 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001471
Fredrik Lundh6f013982000-07-03 18:44:21 +00001472 match->lastindex = state->lastindex;
1473
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001474 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001475
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001476 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001477
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001478 /* no match */
1479 Py_INCREF(Py_None);
1480 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001481
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001482 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001483
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001484 /* internal error */
1485 pattern_error(status);
1486 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001487}
1488
1489static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001490pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001491{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001492 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001493
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001494 ScannerObject* self;
1495
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001496 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001497 int start = 0;
1498 int end = INT_MAX;
1499 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1500 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001501
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001502 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001503 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001504 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001505 return NULL;
1506
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001507 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001508 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001509 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001510 return NULL;
1511 }
1512
1513 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001514 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001515
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001516 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001517}
1518
Guido van Rossumb700df92000-03-31 14:59:30 +00001519static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001520pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001521{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001522 Py_XDECREF(self->pattern);
1523 Py_XDECREF(self->groupindex);
1524 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001525}
1526
1527static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001528pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001529{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001530 SRE_STATE state;
1531 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001532
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001533 PyObject* string;
1534 int start = 0;
1535 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001536 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1537 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1538 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001539 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001540
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001541 string = state_init(&state, self, string, start, end);
1542 if (!string)
1543 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001544
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001545 state.ptr = state.start;
1546
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001547 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1548
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001549 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001550 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001551 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001552#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001553 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001554#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001555 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001556
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001557 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1558
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001559 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001560
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001561 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001562}
1563
1564static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001565pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001566{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001567 SRE_STATE state;
1568 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001569
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001570 PyObject* string;
1571 int start = 0;
1572 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001573 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1574 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1575 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001576 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001577
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001578 string = state_init(&state, self, string, start, end);
1579 if (!string)
1580 return NULL;
1581
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001582 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1583
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001584 if (state.charsize == 1) {
1585 status = sre_search(&state, PatternObject_GetCode(self));
1586 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001587#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001588 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001589#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001590 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001591
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001592 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1593
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001594 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001595
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001596 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001597}
1598
1599static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001600call(char* function, PyObject* args)
1601{
1602 PyObject* name;
1603 PyObject* module;
1604 PyObject* func;
1605 PyObject* result;
1606
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001607 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001608 if (!name)
1609 return NULL;
1610 module = PyImport_Import(name);
1611 Py_DECREF(name);
1612 if (!module)
1613 return NULL;
1614 func = PyObject_GetAttrString(module, function);
1615 Py_DECREF(module);
1616 if (!func)
1617 return NULL;
1618 result = PyObject_CallObject(func, args);
1619 Py_DECREF(func);
1620 Py_DECREF(args);
1621 return result;
1622}
1623
1624static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001625pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001626{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001627 PyObject* template;
1628 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001629 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001630 static char* kwlist[] = { "repl", "string", "count", NULL };
1631 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1632 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001633 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001634
1635 /* delegate to Python code */
1636 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1637}
1638
1639static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001640pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001641{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001642 PyObject* template;
1643 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001644 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001645 static char* kwlist[] = { "repl", "string", "count", NULL };
1646 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1647 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001648 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001649
1650 /* delegate to Python code */
1651 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1652}
1653
1654static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001655pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001656{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001657 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001658 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001659 static char* kwlist[] = { "source", "maxsplit", NULL };
1660 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1661 &string, &maxsplit))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001662 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001663
1664 /* delegate to Python code */
1665 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1666}
1667
1668static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001669pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001670{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001671 SRE_STATE state;
1672 PyObject* list;
1673 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001674 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001676 PyObject* string;
1677 int start = 0;
1678 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001679 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1680 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1681 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001682 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001683
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001684 string = state_init(&state, self, string, start, end);
1685 if (!string)
1686 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001687
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001688 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001689
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001690 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001691
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001692 PyObject* item;
1693
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001694 state_reset(&state);
1695
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001696 state.ptr = state.start;
1697
1698 if (state.charsize == 1) {
1699 status = sre_search(&state, PatternObject_GetCode(self));
1700 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001701#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001703#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001704 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001705
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001706 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001707
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001708 /* don't bother to build a match object */
1709 switch (self->groups) {
1710 case 0:
1711 item = PySequence_GetSlice(
1712 string,
1713 ((char*) state.start - (char*) state.beginning) /
1714 state.charsize,
1715 ((char*) state.ptr - (char*) state.beginning) /
1716 state.charsize);
1717 if (!item)
1718 goto error;
1719 break;
1720 case 1:
1721 item = state_getslice(&state, 1, string);
1722 if (!item)
1723 goto error;
1724 break;
1725 default:
1726 item = PyTuple_New(self->groups);
1727 if (!item)
1728 goto error;
1729 for (i = 0; i < self->groups; i++) {
1730 PyObject* o = state_getslice(&state, i+1, string);
1731 if (!o) {
1732 Py_DECREF(item);
1733 goto error;
1734 }
1735 PyTuple_SET_ITEM(item, i, o);
1736 }
1737 break;
1738 }
1739
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001740 status = PyList_Append(list, item);
1741 Py_DECREF(item);
1742
1743 if (status < 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001744 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001745
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001746 if (state.ptr == state.start)
1747 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001748 else
1749 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001750
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001751 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001752
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001753 if (status == 0)
1754 break;
1755
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001756 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001757 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001759 }
1760 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001761
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001762 state_fini(&state);
1763 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001764
1765error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001766 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001767 state_fini(&state);
1768 return NULL;
1769
Guido van Rossumb700df92000-03-31 14:59:30 +00001770}
1771
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001772static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00001773 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1774 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1775 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1776 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1777 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1778 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001779 /* experimental */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001780 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001781 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001782};
1783
1784static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001785pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001786{
1787 PyObject* res;
1788
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001789 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001790
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001791 if (res)
1792 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001793
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001794 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001795
1796 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001797 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001798 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001799 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001800 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001801
1802 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001803 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001804
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001805 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001806 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001807
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001808 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001809 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001810 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001811 }
1812
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001813 PyErr_SetString(PyExc_AttributeError, name);
1814 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001815}
1816
1817statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001818 PyObject_HEAD_INIT(NULL)
1819 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001820 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001821 (destructor)pattern_dealloc, /*tp_dealloc*/
1822 0, /*tp_print*/
1823 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001824};
1825
1826/* -------------------------------------------------------------------- */
1827/* match methods */
1828
1829static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001830match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001831{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001832 Py_XDECREF(self->regs);
1833 Py_XDECREF(self->string);
1834 Py_DECREF(self->pattern);
1835 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001836}
1837
1838static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001839match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001840{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001841 if (index < 0 || index >= self->groups) {
1842 /* raise IndexError if we were given a bad group number */
1843 PyErr_SetString(
1844 PyExc_IndexError,
1845 "no such group"
1846 );
1847 return NULL;
1848 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001849
Fredrik Lundh6f013982000-07-03 18:44:21 +00001850 index *= 2;
1851
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001852 if (self->string == Py_None || self->mark[index] < 0) {
1853 /* return default value if the string or group is undefined */
1854 Py_INCREF(def);
1855 return def;
1856 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001857
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001858 return PySequence_GetSlice(
1859 self->string, self->mark[index], self->mark[index+1]
1860 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001861}
1862
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001863static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001864match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001865{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001866 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001867
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001868 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001869 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001870
Fredrik Lundh6f013982000-07-03 18:44:21 +00001871 i = -1;
1872
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001873 if (self->pattern->groupindex) {
1874 index = PyObject_GetItem(self->pattern->groupindex, index);
1875 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001876 if (PyInt_Check(index))
1877 i = (int) PyInt_AS_LONG(index);
1878 Py_DECREF(index);
1879 } else
1880 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001881 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001882
1883 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001884}
1885
1886static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001887match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001888{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001889 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001890}
1891
1892static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001893match_expand(MatchObject* self, PyObject* args)
1894{
1895 PyObject* template;
1896 if (!PyArg_ParseTuple(args, "O:expand", &template))
1897 return NULL;
1898
1899 /* delegate to Python code */
1900 return call(
1901 "_expand",
1902 Py_BuildValue("OOO", self->pattern, self, template)
1903 );
1904}
1905
1906static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001907match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001908{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001909 PyObject* result;
1910 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001911
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001912 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001913
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001914 switch (size) {
1915 case 0:
1916 result = match_getslice(self, Py_False, Py_None);
1917 break;
1918 case 1:
1919 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1920 break;
1921 default:
1922 /* fetch multiple items */
1923 result = PyTuple_New(size);
1924 if (!result)
1925 return NULL;
1926 for (i = 0; i < size; i++) {
1927 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001928 self, PyTuple_GET_ITEM(args, i), Py_None
1929 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001930 if (!item) {
1931 Py_DECREF(result);
1932 return NULL;
1933 }
1934 PyTuple_SET_ITEM(result, i, item);
1935 }
1936 break;
1937 }
1938 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001939}
1940
1941static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001942match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001943{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001944 PyObject* result;
1945 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001946
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001947 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001948 static char* kwlist[] = { "default", NULL };
1949 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001950 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001951
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001952 result = PyTuple_New(self->groups-1);
1953 if (!result)
1954 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001955
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001956 for (index = 1; index < self->groups; index++) {
1957 PyObject* item;
1958 item = match_getslice_by_index(self, index, def);
1959 if (!item) {
1960 Py_DECREF(result);
1961 return NULL;
1962 }
1963 PyTuple_SET_ITEM(result, index-1, item);
1964 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001965
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001966 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001967}
1968
1969static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001970match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001971{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001972 PyObject* result;
1973 PyObject* keys;
1974 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001975
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001976 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001977 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00001978 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001979 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001980
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001981 result = PyDict_New();
1982 if (!result || !self->pattern->groupindex)
1983 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001984
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001985 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00001986 if (!keys)
1987 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00001988
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001989 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00001990 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001991 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001992 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001993 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00001994 if (!key)
1995 goto failed;
1996 value = match_getslice(self, key, def);
1997 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001998 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00001999 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002000 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002001 status = PyDict_SetItem(result, key, value);
2002 Py_DECREF(value);
2003 if (status < 0)
2004 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002005 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002006
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002007 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002008
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002009 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002010
2011failed:
2012 Py_DECREF(keys);
2013 Py_DECREF(result);
2014 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002015}
2016
2017static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002018match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002019{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002020 int index;
2021
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002022 PyObject* index_ = Py_False; /* zero */
2023 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2024 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002025
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002026 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002027
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002028 if (index < 0 || index >= self->groups) {
2029 PyErr_SetString(
2030 PyExc_IndexError,
2031 "no such group"
2032 );
2033 return NULL;
2034 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002035
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002036 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002037 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002038}
2039
2040static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002041match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002042{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002043 int index;
2044
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 PyObject* index_ = Py_False; /* zero */
2046 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2047 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002048
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002049 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002050
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002051 if (index < 0 || index >= self->groups) {
2052 PyErr_SetString(
2053 PyExc_IndexError,
2054 "no such group"
2055 );
2056 return NULL;
2057 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002058
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002059 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 return Py_BuildValue("i", self->mark[index*2+1]);
2061}
2062
2063LOCAL(PyObject*)
2064_pair(int i1, int i2)
2065{
2066 PyObject* pair;
2067 PyObject* item;
2068
2069 pair = PyTuple_New(2);
2070 if (!pair)
2071 return NULL;
2072
2073 item = PyInt_FromLong(i1);
2074 if (!item)
2075 goto error;
2076 PyTuple_SET_ITEM(pair, 0, item);
2077
2078 item = PyInt_FromLong(i2);
2079 if (!item)
2080 goto error;
2081 PyTuple_SET_ITEM(pair, 1, item);
2082
2083 return pair;
2084
2085 error:
2086 Py_DECREF(pair);
2087 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002088}
2089
2090static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002091match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002092{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002093 int index;
2094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 PyObject* index_ = Py_False; /* zero */
2096 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2097 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002098
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002099 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002101 if (index < 0 || index >= self->groups) {
2102 PyErr_SetString(
2103 PyExc_IndexError,
2104 "no such group"
2105 );
2106 return NULL;
2107 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002108
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002109 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002110 return _pair(self->mark[index*2], self->mark[index*2+1]);
2111}
2112
2113static PyObject*
2114match_regs(MatchObject* self)
2115{
2116 PyObject* regs;
2117 PyObject* item;
2118 int index;
2119
2120 regs = PyTuple_New(self->groups);
2121 if (!regs)
2122 return NULL;
2123
2124 for (index = 0; index < self->groups; index++) {
2125 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2126 if (!item) {
2127 Py_DECREF(regs);
2128 return NULL;
2129 }
2130 PyTuple_SET_ITEM(regs, index, item);
2131 }
2132
2133 Py_INCREF(regs);
2134 self->regs = regs;
2135
2136 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002137}
2138
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002139static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002140 {"group", (PyCFunction) match_group, METH_VARARGS},
2141 {"start", (PyCFunction) match_start, METH_VARARGS},
2142 {"end", (PyCFunction) match_end, METH_VARARGS},
2143 {"span", (PyCFunction) match_span, METH_VARARGS},
2144 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2145 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2146 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002148};
2149
2150static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002151match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002152{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002154
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002155 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2156 if (res)
2157 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002159 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002160
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002161 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002162 if (self->lastindex >= 0)
2163 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002164 Py_INCREF(Py_None);
2165 return Py_None;
2166 }
2167
2168 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002169 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002170 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002171 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002172 );
2173 if (result)
2174 return result;
2175 PyErr_Clear();
2176 }
2177 Py_INCREF(Py_None);
2178 return Py_None;
2179 }
2180
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002181 if (!strcmp(name, "string")) {
2182 if (self->string) {
2183 Py_INCREF(self->string);
2184 return self->string;
2185 } else {
2186 Py_INCREF(Py_None);
2187 return Py_None;
2188 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002189 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002190
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002191 if (!strcmp(name, "regs")) {
2192 if (self->regs) {
2193 Py_INCREF(self->regs);
2194 return self->regs;
2195 } else
2196 return match_regs(self);
2197 }
2198
2199 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002200 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002201 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002202 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002203
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002204 if (!strcmp(name, "pos"))
2205 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002206
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 if (!strcmp(name, "endpos"))
2208 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002209
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002210 PyErr_SetString(PyExc_AttributeError, name);
2211 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002212}
2213
2214/* FIXME: implement setattr("string", None) as a special case (to
2215 detach the associated string, if any */
2216
2217statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002218 PyObject_HEAD_INIT(NULL)
2219 0, "SRE_Match",
2220 sizeof(MatchObject), sizeof(int),
2221 (destructor)match_dealloc, /*tp_dealloc*/
2222 0, /*tp_print*/
2223 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002224};
2225
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002226/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002227/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002228
2229static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002230scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002231{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002232 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002233 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002234 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002235}
2236
2237static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002238scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002239{
2240 SRE_STATE* state = &self->state;
2241 PyObject* match;
2242 int status;
2243
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002244 state_reset(state);
2245
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002246 state->ptr = state->start;
2247
2248 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002249 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002250 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002251#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002252 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002253#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002254 }
2255
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002256 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002257 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002258
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002259 if (status == 0 || state->ptr == state->start)
2260 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002261 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002262 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002263
2264 return match;
2265}
2266
2267
2268static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002269scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002270{
2271 SRE_STATE* state = &self->state;
2272 PyObject* match;
2273 int status;
2274
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002275 state_reset(state);
2276
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002277 state->ptr = state->start;
2278
2279 if (state->charsize == 1) {
2280 status = sre_search(state, PatternObject_GetCode(self->pattern));
2281 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002282#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002283 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002284#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002285 }
2286
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002287 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002288 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002289
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002290 if (status == 0 || state->ptr == state->start)
2291 state->start = (void*) ((char*) state->ptr + state->charsize);
2292 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002293 state->start = state->ptr;
2294
2295 return match;
2296}
2297
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002298static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002299 {"match", (PyCFunction) scanner_match, 0},
2300 {"search", (PyCFunction) scanner_search, 0},
2301 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002302};
2303
2304static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002305scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002306{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002307 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002308
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002309 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2310 if (res)
2311 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002312
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002313 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002314
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002315 /* attributes */
2316 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002317 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002318 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002319 }
2320
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002321 PyErr_SetString(PyExc_AttributeError, name);
2322 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002323}
2324
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002325statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002326 PyObject_HEAD_INIT(NULL)
2327 0, "SRE_Scanner",
2328 sizeof(ScannerObject), 0,
2329 (destructor)scanner_dealloc, /*tp_dealloc*/
2330 0, /*tp_print*/
2331 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002332};
2333
Guido van Rossumb700df92000-03-31 14:59:30 +00002334static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002335 {"compile", _compile, 1},
2336 {"getcodesize", sre_codesize, 1},
2337 {"getlower", sre_getlower, 1},
2338 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002339};
2340
2341void
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002342#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002343__declspec(dllexport)
2344#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002345init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002346{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002347 /* Patch object types */
2348 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002349 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002350
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002351 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002352}
2353
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002354#endif /* !defined(SRE_RECURSIVE) */