blob: efb704bdf573b817da0cf0e77b037dc85c612116 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +000027 * 2001-01-15 fl don't use recursion for unbounded MIN_UTIL
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000029 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000030 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000031 * This version of the SRE library can be redistributed under CNRI's
32 * Python 1.6 license. For any other use, please contact Secret Labs
33 * AB (info@pythonware.com).
34 *
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000036 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000037 * other compatibility work.
38 */
39
40#ifndef SRE_RECURSIVE
41
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +000042char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000043
44#include "Python.h"
45
46#include "sre.h"
47
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000048#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000049
Fredrik Lundh436c3d582000-06-29 08:58:44 +000050/* name of this module, minus the leading underscore */
51#define MODULE "sre"
52
Guido van Rossumb700df92000-03-31 14:59:30 +000053/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000054#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000055
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000057/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000058#define HAVE_UNICODE
59#endif
60
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000061/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000062/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh33accc12000-08-27 20:59:47 +000064/* prevent run-away recursion (bad patterns on long strings) */
65
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000066#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000067#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
68/* require smaller recursion limit for a number of 64-bit platforms:
69 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
70/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
71#define USE_RECURSION_LIMIT 7500
72#else
73#define USE_RECURSION_LIMIT 10000
74#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000075#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000078#define USE_FAST_SEARCH
79
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000081#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000082
83/* -------------------------------------------------------------------- */
84
Fredrik Lundh80946112000-06-29 18:03:25 +000085#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000086#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000087#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000088/* fastest possible local call under MSVC */
89#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000090#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000091#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000092#else
93#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000094#endif
95
96/* error codes */
97#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000098#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000099#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000100#define SRE_ERROR_MEMORY -9 /* out of memory */
101
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000102#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000103#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000104#else
105#define TRACE(v)
106#endif
107
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000108/* -------------------------------------------------------------------- */
109/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000110
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000111/* default character predicates (run sre_chars.py to regenerate tables) */
112
113#define SRE_DIGIT_MASK 1
114#define SRE_SPACE_MASK 2
115#define SRE_LINEBREAK_MASK 4
116#define SRE_ALNUM_MASK 8
117#define SRE_WORD_MASK 16
118
119static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1202, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12225, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1240, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
126
Fredrik Lundhb389df32000-06-29 12:48:37 +0000127static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012810, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12927, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
13044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13161, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
132108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
133122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
134106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
135120, 121, 122, 123, 124, 125, 126, 127 };
136
Fredrik Lundhb389df32000-06-29 12:48:37 +0000137static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000138{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000139 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000140}
141
142#define SRE_IS_DIGIT(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
144#define SRE_IS_SPACE(ch)\
145 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
146#define SRE_IS_LINEBREAK(ch)\
147 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
148#define SRE_IS_ALNUM(ch)\
149 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
150#define SRE_IS_WORD(ch)\
151 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000152
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000153/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000154
Fredrik Lundhb389df32000-06-29 12:48:37 +0000155static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000156{
157 return ((ch) < 256 ? tolower((ch)) : ch);
158}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000159#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
160#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
161#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
162#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
163#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
164
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000165/* unicode-specific character predicates */
166
167#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000168static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000169{
170 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
171}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000172#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
173#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
174#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000175#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000176#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000177#endif
178
Guido van Rossumb700df92000-03-31 14:59:30 +0000179LOCAL(int)
180sre_category(SRE_CODE category, unsigned int ch)
181{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000183
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000184 case SRE_CATEGORY_DIGIT:
185 return SRE_IS_DIGIT(ch);
186 case SRE_CATEGORY_NOT_DIGIT:
187 return !SRE_IS_DIGIT(ch);
188 case SRE_CATEGORY_SPACE:
189 return SRE_IS_SPACE(ch);
190 case SRE_CATEGORY_NOT_SPACE:
191 return !SRE_IS_SPACE(ch);
192 case SRE_CATEGORY_WORD:
193 return SRE_IS_WORD(ch);
194 case SRE_CATEGORY_NOT_WORD:
195 return !SRE_IS_WORD(ch);
196 case SRE_CATEGORY_LINEBREAK:
197 return SRE_IS_LINEBREAK(ch);
198 case SRE_CATEGORY_NOT_LINEBREAK:
199 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000200
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000201 case SRE_CATEGORY_LOC_WORD:
202 return SRE_LOC_IS_WORD(ch);
203 case SRE_CATEGORY_LOC_NOT_WORD:
204 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000205
206#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000207 case SRE_CATEGORY_UNI_DIGIT:
208 return SRE_UNI_IS_DIGIT(ch);
209 case SRE_CATEGORY_UNI_NOT_DIGIT:
210 return !SRE_UNI_IS_DIGIT(ch);
211 case SRE_CATEGORY_UNI_SPACE:
212 return SRE_UNI_IS_SPACE(ch);
213 case SRE_CATEGORY_UNI_NOT_SPACE:
214 return !SRE_UNI_IS_SPACE(ch);
215 case SRE_CATEGORY_UNI_WORD:
216 return SRE_UNI_IS_WORD(ch);
217 case SRE_CATEGORY_UNI_NOT_WORD:
218 return !SRE_UNI_IS_WORD(ch);
219 case SRE_CATEGORY_UNI_LINEBREAK:
220 return SRE_UNI_IS_LINEBREAK(ch);
221 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
222 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000223#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000224 }
225 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000226}
227
228/* helpers */
229
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000230static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000231mark_fini(SRE_STATE* state)
232{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000233 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000234 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000235 state->mark_stack = NULL;
236 }
237 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000238}
239
240static int
241mark_save(SRE_STATE* state, int lo, int hi)
242{
243 void* stack;
244 int size;
245 int minsize, newsize;
246
247 if (hi <= lo)
248 return 0;
249
250 size = (hi - lo) + 1;
251
252 newsize = state->mark_stack_size;
253 minsize = state->mark_stack_base + size;
254
255 if (newsize < minsize) {
256 /* create new stack */
257 if (!newsize) {
258 newsize = 512;
259 if (newsize < minsize)
260 newsize = minsize;
261 TRACE(("allocate stack %d\n", newsize));
262 stack = malloc(sizeof(void*) * newsize);
263 } else {
264 /* grow the stack */
265 while (newsize < minsize)
266 newsize += newsize;
267 TRACE(("grow stack to %d\n", newsize));
268 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
269 }
270 if (!stack) {
271 mark_fini(state);
272 return SRE_ERROR_MEMORY;
273 }
274 state->mark_stack = stack;
275 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000276 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000277
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000278 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000279
280 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
281 size * sizeof(void*));
282
283 state->mark_stack_base += size;
284
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000285 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000286}
287
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000288static int
289mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000290{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000291 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000292
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000293 if (hi <= lo)
294 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000295
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000296 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000297
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000298 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000299
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000300 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000301
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000302 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
303 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000304
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000305 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000306}
307
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000308/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000309
310#define SRE_CHAR unsigned char
311#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000312#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000313#define SRE_CHARSET sre_charset
314#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000315#define SRE_MATCH sre_match
316#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000317
318#if defined(HAVE_UNICODE)
319
Guido van Rossumb700df92000-03-31 14:59:30 +0000320#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000321#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000322#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000323
Guido van Rossumb700df92000-03-31 14:59:30 +0000324#undef SRE_SEARCH
325#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000326#undef SRE_INFO
327#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000328#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000329#undef SRE_AT
330#undef SRE_CHAR
331
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000332/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000333
334#define SRE_CHAR Py_UNICODE
335#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000336#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000337#define SRE_CHARSET sre_ucharset
338#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000339#define SRE_MATCH sre_umatch
340#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000341#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000342
343#endif /* SRE_RECURSIVE */
344
345/* -------------------------------------------------------------------- */
346/* String matching engine */
347
348/* the following section is compiled twice, with different character
349 settings */
350
351LOCAL(int)
352SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
353{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000354 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000355
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000356 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000357
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000358 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000359
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000360 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000361 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000362 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000363
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000364 case SRE_AT_BEGINNING_LINE:
365 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000366 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000367
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000368 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000369 return (((void*) (ptr+1) == state->end &&
370 SRE_IS_LINEBREAK((int) ptr[0])) ||
371 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000372
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000373 case SRE_AT_END_LINE:
374 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000375 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000376
Fredrik Lundh770617b2001-01-14 15:06:11 +0000377 case SRE_AT_END_STRING:
378 return ((void*) ptr == state->end);
379
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000380 case SRE_AT_BOUNDARY:
381 if (state->beginning == state->end)
382 return 0;
383 that = ((void*) ptr > state->beginning) ?
384 SRE_IS_WORD((int) ptr[-1]) : 0;
385 this = ((void*) ptr < state->end) ?
386 SRE_IS_WORD((int) ptr[0]) : 0;
387 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000389 case SRE_AT_NON_BOUNDARY:
390 if (state->beginning == state->end)
391 return 0;
392 that = ((void*) ptr > state->beginning) ?
393 SRE_IS_WORD((int) ptr[-1]) : 0;
394 this = ((void*) ptr < state->end) ?
395 SRE_IS_WORD((int) ptr[0]) : 0;
396 return this == that;
397 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000400}
401
402LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000403SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000404{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000405 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 for (;;) {
410 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000412 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000413 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000414 if (ch == set[0])
415 return ok;
416 set++;
417 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000418
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000419 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000420 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000421 if (set[0] <= ch && ch <= set[1])
422 return ok;
423 set += 2;
424 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000425
Fredrik Lundh3562f112000-07-02 12:00:07 +0000426 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000427 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000428 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
429 return ok;
430 set += 16;
431 break;
432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000434 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000435 if (sre_category(set[0], (int) ch))
436 return ok;
437 set += 1;
438 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000439
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000440 case SRE_OP_NEGATE:
441 ok = !ok;
442 break;
443
444 case SRE_OP_FAILURE:
445 return !ok;
446
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000447 default:
448 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000449 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000450 return 0;
451 }
452 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000453}
454
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000455LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
456
457LOCAL(int)
458SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
459{
460 SRE_CODE chr;
461 SRE_CHAR* ptr = state->ptr;
462 SRE_CHAR* end = state->end;
463 int i;
464
465 /* adjust end */
466 if (maxcount < end - ptr && maxcount != 65535)
467 end = ptr + maxcount;
468
469 switch (pattern[0]) {
470
471 case SRE_OP_ANY:
472 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000473 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000474 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
475 ptr++;
476 break;
477
478 case SRE_OP_ANY_ALL:
479 /* repeated dot wildcare. skip to the end of the target
480 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000481 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000482 ptr = end;
483 break;
484
485 case SRE_OP_LITERAL:
486 /* repeated literal */
487 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000488 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000489 while (ptr < end && (SRE_CODE) *ptr == chr)
490 ptr++;
491 break;
492
493 case SRE_OP_LITERAL_IGNORE:
494 /* repeated literal */
495 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000496 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
498 ptr++;
499 break;
500
501 case SRE_OP_NOT_LITERAL:
502 /* repeated non-literal */
503 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000504 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000505 while (ptr < end && (SRE_CODE) *ptr != chr)
506 ptr++;
507 break;
508
509 case SRE_OP_NOT_LITERAL_IGNORE:
510 /* repeated non-literal */
511 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000512 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000513 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
514 ptr++;
515 break;
516
517 case SRE_OP_IN:
518 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000519 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
520 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000521 ptr++;
522 break;
523
524 default:
525 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000526 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000527 while ((SRE_CHAR*) state->ptr < end) {
528 i = SRE_MATCH(state, pattern, level);
529 if (i < 0)
530 return i;
531 if (!i)
532 break;
533 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000534 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
535 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000536 return (SRE_CHAR*) state->ptr - ptr;
537 }
538
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000539 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000540 return ptr - (SRE_CHAR*) state->ptr;
541}
542
Fredrik Lundh33accc12000-08-27 20:59:47 +0000543#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000544LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000545SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
546{
547 /* check if an SRE_OP_INFO block matches at the current position.
548 returns the number of SRE_CODE objects to skip if successful, 0
549 if no match */
550
551 SRE_CHAR* end = state->end;
552 SRE_CHAR* ptr = state->ptr;
553 int i;
554
555 /* check minimal length */
556 if (pattern[3] && (end - ptr) < pattern[3])
557 return 0;
558
559 /* check known prefix */
560 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
561 /* <length> <skip> <prefix data> <overlap data> */
562 for (i = 0; i < pattern[5]; i++)
563 if ((SRE_CODE) ptr[i] != pattern[7 + i])
564 return 0;
565 return pattern[0] + 2 * pattern[6];
566 }
567 return pattern[0];
568}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000569#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000570
571LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000572SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000573{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000574 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000576
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000577 SRE_CHAR* end = state->end;
578 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000579 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000580 SRE_REPEAT* rp;
581 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000582 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000583
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000584 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000585
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000586 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000587
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000588#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000589 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000590 return SRE_ERROR_RECURSION_LIMIT;
591#endif
592
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000593#if defined(USE_RECURSION_LIMIT)
594 if (level > USE_RECURSION_LIMIT)
595 return SRE_ERROR_RECURSION_LIMIT;
596#endif
597
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000598 if (pattern[0] == SRE_OP_INFO) {
599 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000600 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000601 if (pattern[3] && (end - ptr) < pattern[3]) {
602 TRACE(("reject (got %d chars, need %d)\n",
603 (end - ptr), pattern[3]));
604 return 0;
605 }
606 pattern += pattern[1] + 1;
607 }
608
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000609 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000610
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000611 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000612
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 case SRE_OP_FAILURE:
614 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000615 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000616 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000617
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000618 case SRE_OP_SUCCESS:
619 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000620 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000621 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000622 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000623
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000624 case SRE_OP_AT:
625 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000626 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000627 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000628 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000629 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000630 pattern++;
631 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000632
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000633 case SRE_OP_CATEGORY:
634 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000635 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000636 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000637 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000638 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000639 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000640 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000641 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000642
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000643 case SRE_OP_LITERAL:
644 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000645 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000646 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000647 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000648 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000649 pattern++;
650 ptr++;
651 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000652
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000653 case SRE_OP_NOT_LITERAL:
654 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000655 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000656 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000657 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000658 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000659 pattern++;
660 ptr++;
661 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000662
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000663 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000664 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000665 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000666 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000667 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
668 return 0;
669 ptr++;
670 break;
671
672 case SRE_OP_ANY_ALL:
673 /* match anything */
674 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000675 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000677 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 ptr++;
679 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000680
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000681 case SRE_OP_IN:
682 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000684 TRACE(("|%p|%p|IN\n", pattern, ptr));
685 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000686 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000687 pattern += pattern[0];
688 ptr++;
689 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000690
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000691 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000692 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000693 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000694 i = pattern[0];
695 {
696 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
697 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
698 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000699 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000700 while (p < e) {
701 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000702 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000703 p++; ptr++;
704 }
705 }
706 pattern++;
707 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000708
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000709 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000711 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000712 i = pattern[0];
713 {
714 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
715 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
716 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000717 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000718 while (p < e) {
719 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000720 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000721 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000722 p++; ptr++;
723 }
724 }
725 pattern++;
726 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000727
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000728 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000729 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000730 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000731 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000732 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000733 pattern++;
734 ptr++;
735 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000736
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000737 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000738 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000740 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000741 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 pattern++;
743 ptr++;
744 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000745
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000747 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000749 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000750 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000751 pattern += pattern[0];
752 ptr++;
753 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 case SRE_OP_MARK:
756 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000757 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000758 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000759 i = pattern[0];
760 if (i & 1)
761 state->lastindex = i/2 + 1;
762 if (i > state->lastmark)
763 state->lastmark = i;
764 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000765 pattern++;
766 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000767
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 case SRE_OP_JUMP:
769 case SRE_OP_INFO:
770 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000771 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000772 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000773 pattern += pattern[0];
774 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000775
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000776 case SRE_OP_ASSERT:
777 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000778 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000779 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000781 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000782 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000783 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000784 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000785 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000786 pattern += pattern[0];
787 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000788
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000789 case SRE_OP_ASSERT_NOT:
790 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000791 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000792 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000794 if (state->ptr >= state->beginning) {
795 i = SRE_MATCH(state, pattern + 2, level + 1);
796 if (i < 0)
797 return i;
798 if (i)
799 return 0;
800 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000801 pattern += pattern[0];
802 break;
803
804 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000805 /* alternation */
806 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000807 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000808 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000809 for (; pattern[0]; pattern += pattern[0]) {
810 if (pattern[1] == SRE_OP_LITERAL &&
811 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
812 continue;
813 if (pattern[1] == SRE_OP_IN &&
814 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
815 continue;
816 state->ptr = ptr;
817 i = SRE_MATCH(state, pattern + 1, level + 1);
818 if (i)
819 return i;
820 if (state->lastmark > lastmark) {
821 memset(
822 state->mark + lastmark + 1, 0,
823 (state->lastmark - lastmark) * sizeof(void*)
824 );
825 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000826 }
827 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000828 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000829
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000830 case SRE_OP_REPEAT_ONE:
831 /* match repeated sequence (maximizing regexp) */
832
833 /* this operator only works if the repeated item is
834 exactly one character wide, and we're not already
835 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000836 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000837
838 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
839
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000840 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000841 pattern[1], pattern[2]));
842
Fredrik Lundhe1869832000-08-01 22:47:49 +0000843 if (ptr + pattern[1] > end)
844 return 0; /* cannot match */
845
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000846 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000847
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000848 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
849 if (count < 0)
850 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000851
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000852 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000853
854 /* when we arrive here, count contains the number of
855 matches, and ptr points to the tail of the target
856 string. check if the rest of the pattern matches,
857 and backtrack if not. */
858
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000859 if (count < (int) pattern[1])
860 return 0;
861
862 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
863 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000864 state->ptr = ptr;
865 return 1;
866
867 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
868 /* tail starts with a literal. skip positions where
869 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000870 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000871 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000872 while (count >= (int) pattern[1] &&
873 (ptr >= end || *ptr != chr)) {
874 ptr--;
875 count--;
876 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000877 if (count < (int) pattern[1])
878 break;
879 state->ptr = ptr;
880 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000881 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000882 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000883 ptr--;
884 count--;
885 }
886
887 } else {
888 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000889 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000890 while (count >= (int) pattern[1]) {
891 state->ptr = ptr;
892 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000893 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000894 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000895 ptr--;
896 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000897 if (state->lastmark > lastmark) {
898 memset(
899 state->mark + lastmark + 1, 0,
900 (state->lastmark - lastmark) * sizeof(void*)
901 );
902 state->lastmark = lastmark;
903 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000904 }
905 }
906 return 0;
907
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000908 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000909 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +0000910 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000911 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000912 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000913 pattern[1], pattern[2]));
914
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000915 rep.count = -1;
916 rep.pattern = pattern;
917
918 /* install new repeat context */
919 rep.prev = state->repeat;
920 state->repeat = &rep;
921
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000922 state->ptr = ptr;
923 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000924
925 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000926
927 return i;
928
929 case SRE_OP_MAX_UNTIL:
930 /* maximizing repeat */
931 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
932
933 /* FIXME: we probably need to deal with zero-width
934 matches in here... */
935
936 rp = state->repeat;
937 if (!rp)
938 return SRE_ERROR_STATE;
939
940 state->ptr = ptr;
941
942 count = rp->count + 1;
943
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000944 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000945
946 if (count < rp->pattern[1]) {
947 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000948 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000949 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000950 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000951 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000952 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000953 rp->count = count - 1;
954 state->ptr = ptr;
955 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000956 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000957
958 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000959 /* we may have enough matches, but if we can
960 match another item, do so */
961 rp->count = count;
962 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000963 i = mark_save(state, 0, lastmark);
964 if (i < 0)
965 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000966 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000967 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000968 if (i)
969 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000970 i = mark_restore(state, 0, lastmark);
971 if (i < 0)
972 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000973 rp->count = count - 1;
974 state->ptr = ptr;
975 }
976
977 /* cannot match more repeated items here. make sure the
978 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000979 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000980 i = SRE_MATCH(state, pattern, level + 1);
981 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000982 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000983 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +0000984 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000985 return 0;
986
987 case SRE_OP_MIN_UNTIL:
988 /* minimizing repeat */
989 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
990
991 rp = state->repeat;
992 if (!rp)
993 return SRE_ERROR_STATE;
994
995 count = rp->count + 1;
996
Fredrik Lundh770617b2001-01-14 15:06:11 +0000997 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
998 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000999
1000 state->ptr = ptr;
1001
1002 if (count < rp->pattern[1]) {
1003 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001004 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001005 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001006 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001007 if (i)
1008 return i;
1009 rp->count = count-1;
1010 state->ptr = ptr;
1011 return 0;
1012 }
1013
1014 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001015 state->repeat = rp->prev;
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001016 if (rp->pattern[2] == 65535) {
1017 /* unbounded repeat */
1018 for (;;) {
1019 i = SRE_MATCH(state, pattern, level + 1);
1020 if (i || ptr >= end)
1021 break;
1022 state->ptr = ++ptr;
1023 }
1024 } else
1025 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001026 if (i) {
1027 /* free(rp); */
1028 return i;
1029 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001030
Fredrik Lundh770617b2001-01-14 15:06:11 +00001031 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001032 state->repeat = rp;
1033
1034 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1035 return 0;
1036
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001037 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001038 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001039 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001040 if (i)
1041 return i;
1042 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001043 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001044 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001045
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001046 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001047 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001048 return SRE_ERROR_ILLEGAL;
1049 }
1050 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001051
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001052 /* shouldn't end up here */
1053 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001054}
1055
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001056LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001057SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1058{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001059 SRE_CHAR* ptr = state->start;
1060 SRE_CHAR* end = state->end;
1061 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001062 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001063 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001064 SRE_CODE* prefix = NULL;
1065 SRE_CODE* charset = NULL;
1066 SRE_CODE* overlap = NULL;
1067 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001068
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001069 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001070 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001071 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001072
1073 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001074
1075 if (pattern[3] > 0) {
1076 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001077 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001078 end -= pattern[3]-1;
1079 if (end <= ptr)
1080 end = ptr+1;
1081 }
1082
Fredrik Lundh3562f112000-07-02 12:00:07 +00001083 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001084 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001085 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001086 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001087 prefix_skip = pattern[6];
1088 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001089 overlap = prefix + prefix_len - 1;
1090 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001091 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001092 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001093 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001094
1095 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001096 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001097
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001098 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1099 TRACE(("charset = %p\n", charset));
1100
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001101#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001102 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001103 /* pattern starts with a known prefix. use the overlap
1104 table to skip forward as fast as we possibly can */
1105 int i = 0;
1106 end = state->end;
1107 while (ptr < end) {
1108 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001109 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001110 if (!i)
1111 break;
1112 else
1113 i = overlap[i];
1114 } else {
1115 if (++i == prefix_len) {
1116 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001117 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1118 state->start = ptr + 1 - prefix_len;
1119 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001120 if (flags & SRE_INFO_LITERAL)
1121 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001122 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001123 if (status != 0)
1124 return status;
1125 /* close but no cigar -- try again */
1126 i = overlap[i];
1127 }
1128 break;
1129 }
1130
1131 }
1132 ptr++;
1133 }
1134 return 0;
1135 }
1136#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001137
Fredrik Lundh3562f112000-07-02 12:00:07 +00001138 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001139 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001140 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001141 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001142 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001143 for (;;) {
1144 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1145 ptr++;
1146 if (ptr == end)
1147 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001148 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001149 state->start = ptr;
1150 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001151 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001152 if (status != 0)
1153 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001154 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001155 } else if (charset) {
1156 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001157 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001158 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001159 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001160 ptr++;
1161 if (ptr == end)
1162 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001163 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001164 state->start = ptr;
1165 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001166 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001167 if (status != 0)
1168 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001169 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001170 }
1171 } else
1172 /* general case */
1173 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001174 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001175 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001176 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001177 if (status != 0)
1178 break;
1179 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001180
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001181 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001182}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001183
Guido van Rossumb700df92000-03-31 14:59:30 +00001184
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001185#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001186
1187/* -------------------------------------------------------------------- */
1188/* factories and destructors */
1189
1190/* see sre.h for object declarations */
1191
1192staticforward PyTypeObject Pattern_Type;
1193staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001194staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001195
1196static PyObject *
1197_compile(PyObject* self_, PyObject* args)
1198{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001199 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001200
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001201 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001202 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001203
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001204 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001205 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001206 PyObject* code;
1207 int groups = 0;
1208 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001209 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001210 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001211 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001212 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001213
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001214 code = PySequence_Fast(code, "code argument must be a sequence");
1215 if (!code)
1216 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001217
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001218#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001219 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001220#else
1221 n = PySequence_Length(code);
1222#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001223
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001224 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001225 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001226 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001227 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001228 }
1229
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001230 for (i = 0; i < n; i++) {
1231 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001232 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001233 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001234
1235 Py_DECREF(code);
1236
1237 if (PyErr_Occurred())
1238 return NULL;
1239
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001240 Py_INCREF(pattern);
1241 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001242
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001243 self->flags = flags;
1244
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001245 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001246
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001247 Py_XINCREF(groupindex);
1248 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001249
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001250 Py_XINCREF(indexgroup);
1251 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001252
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001253 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001254}
1255
1256static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001257sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001258{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001259 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001260}
1261
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001262static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001263sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001264{
1265 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001266 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001267 return NULL;
1268 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001269 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001270#if defined(HAVE_UNICODE)
1271 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001272 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001273#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001274 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001275}
1276
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001277LOCAL(void)
1278state_reset(SRE_STATE* state)
1279{
1280 int i;
1281
1282 state->lastmark = 0;
1283
1284 /* FIXME: dynamic! */
1285 for (i = 0; i < SRE_MARK_SIZE; i++)
1286 state->mark[i] = NULL;
1287
1288 state->lastindex = -1;
1289
1290 state->repeat = NULL;
1291
1292 mark_fini(state);
1293}
1294
Guido van Rossumb700df92000-03-31 14:59:30 +00001295LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001296state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1297 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001298{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001299 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001300
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001301 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001302 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001303 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001304
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001305 memset(state, 0, sizeof(SRE_STATE));
1306
1307 state->lastindex = -1;
1308
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001309#if defined(HAVE_UNICODE)
1310 if (PyUnicode_Check(string)) {
1311 /* unicode strings doesn't always support the buffer interface */
1312 ptr = (void*) PyUnicode_AS_DATA(string);
1313 bytes = PyUnicode_GET_DATA_SIZE(string);
1314 size = PyUnicode_GET_SIZE(string);
1315 state->charsize = sizeof(Py_UNICODE);
1316
1317 } else {
1318#endif
1319
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001320 /* get pointer to string buffer */
1321 buffer = string->ob_type->tp_as_buffer;
1322 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1323 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001324 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001325 return NULL;
1326 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001327
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001328 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001329 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1330 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001331 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1332 return NULL;
1333 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001334
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001335 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001336#if PY_VERSION_HEX >= 0x01060000
1337 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001338#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001339 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001340#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001341
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001342 if (PyString_Check(string) || bytes == size)
1343 state->charsize = 1;
1344#if defined(HAVE_UNICODE)
1345 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1346 state->charsize = sizeof(Py_UNICODE);
1347#endif
1348 else {
1349 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1350 return NULL;
1351 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001352
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001353#if defined(HAVE_UNICODE)
1354 }
1355#endif
1356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 /* adjust boundaries */
1358 if (start < 0)
1359 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001360 else if (start > size)
1361 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001362
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001363 if (end < 0)
1364 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001365 else if (end > size)
1366 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001367
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001368 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001369
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001370 state->start = (void*) ((char*) ptr + start * state->charsize);
1371 state->end = (void*) ((char*) ptr + end * state->charsize);
1372
1373 Py_INCREF(string);
1374 state->string = string;
1375 state->pos = start;
1376 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001377
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001378 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001379 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001380#if defined(HAVE_UNICODE)
1381 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001382 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001383#endif
1384 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001385 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001387 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001388}
1389
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001390LOCAL(void)
1391state_fini(SRE_STATE* state)
1392{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001393 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001394 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001395}
1396
1397LOCAL(PyObject*)
1398state_getslice(SRE_STATE* state, int index, PyObject* string)
1399{
Fredrik Lundh58100642000-08-09 09:14:35 +00001400 int i, j;
1401
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001402 index = (index - 1) * 2;
1403
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001404 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001405 i = j = 0;
1406 } else {
1407 i = ((char*)state->mark[index] - (char*)state->beginning) /
1408 state->charsize;
1409 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1410 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001411 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001412
Fredrik Lundh58100642000-08-09 09:14:35 +00001413 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001414}
1415
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001416static void
1417pattern_error(int status)
1418{
1419 switch (status) {
1420 case SRE_ERROR_RECURSION_LIMIT:
1421 PyErr_SetString(
1422 PyExc_RuntimeError,
1423 "maximum recursion limit exceeded"
1424 );
1425 break;
1426 case SRE_ERROR_MEMORY:
1427 PyErr_NoMemory();
1428 break;
1429 default:
1430 /* other error codes indicate compiler/engine bugs */
1431 PyErr_SetString(
1432 PyExc_RuntimeError,
1433 "internal error in regular expression engine"
1434 );
1435 }
1436}
1437
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001438static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001439pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001440{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001441 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001442
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001443 MatchObject* match;
1444 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001445 char* base;
1446 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001447
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001448 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001449
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001450 /* create match object (with room for extra group marks) */
1451 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001452 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001453 if (!match)
1454 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001455
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001456 Py_INCREF(pattern);
1457 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001458
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001459 Py_INCREF(state->string);
1460 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001461
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001462 match->regs = NULL;
1463 match->groups = pattern->groups+1;
1464
1465 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001466
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001467 base = (char*) state->beginning;
1468 n = state->charsize;
1469
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001470 match->mark[0] = ((char*) state->start - base) / n;
1471 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001473 for (i = j = 0; i < pattern->groups; i++, j+=2)
1474 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1475 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1476 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1477 } else
1478 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1479
1480 match->pos = state->pos;
1481 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001482
Fredrik Lundh6f013982000-07-03 18:44:21 +00001483 match->lastindex = state->lastindex;
1484
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001485 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001486
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001487 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001488
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001489 /* no match */
1490 Py_INCREF(Py_None);
1491 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001492
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001493 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001494
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001495 /* internal error */
1496 pattern_error(status);
1497 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001498}
1499
1500static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001501pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001502{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001503 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001504
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001505 ScannerObject* self;
1506
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001507 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001508 int start = 0;
1509 int end = INT_MAX;
1510 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1511 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001512
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001513 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001514 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001515 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001516 return NULL;
1517
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001518 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001519 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001520 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001521 return NULL;
1522 }
1523
1524 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001525 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001526
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001527 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001528}
1529
Guido van Rossumb700df92000-03-31 14:59:30 +00001530static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001531pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001532{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001533 Py_XDECREF(self->pattern);
1534 Py_XDECREF(self->groupindex);
1535 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001536}
1537
1538static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001539pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001540{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001541 SRE_STATE state;
1542 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001543
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001544 PyObject* string;
1545 int start = 0;
1546 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001547 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1548 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1549 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001550 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001551
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001552 string = state_init(&state, self, string, start, end);
1553 if (!string)
1554 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001555
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001556 state.ptr = state.start;
1557
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001558 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1559
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001560 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001561 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001562 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001563#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001564 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001565#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001566 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001567
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001568 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1569
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001570 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001571
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001573}
1574
1575static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001576pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001577{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001578 SRE_STATE state;
1579 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001580
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001581 PyObject* string;
1582 int start = 0;
1583 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001584 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1585 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1586 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001587 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001588
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001589 string = state_init(&state, self, string, start, end);
1590 if (!string)
1591 return NULL;
1592
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001593 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1594
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001595 if (state.charsize == 1) {
1596 status = sre_search(&state, PatternObject_GetCode(self));
1597 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001598#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001599 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001600#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001601 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001602
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001603 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001605 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001606
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001607 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001608}
1609
1610static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001611call(char* function, PyObject* args)
1612{
1613 PyObject* name;
1614 PyObject* module;
1615 PyObject* func;
1616 PyObject* result;
1617
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001618 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001619 if (!name)
1620 return NULL;
1621 module = PyImport_Import(name);
1622 Py_DECREF(name);
1623 if (!module)
1624 return NULL;
1625 func = PyObject_GetAttrString(module, function);
1626 Py_DECREF(module);
1627 if (!func)
1628 return NULL;
1629 result = PyObject_CallObject(func, args);
1630 Py_DECREF(func);
1631 Py_DECREF(args);
1632 return result;
1633}
1634
1635static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001636pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001637{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001638 PyObject* template;
1639 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001640 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001641 static char* kwlist[] = { "repl", "string", "count", NULL };
1642 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1643 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001644 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001645
1646 /* delegate to Python code */
1647 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1648}
1649
1650static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001651pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001652{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001653 PyObject* template;
1654 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001655 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001656 static char* kwlist[] = { "repl", "string", "count", NULL };
1657 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1658 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001659 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001660
1661 /* delegate to Python code */
1662 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1663}
1664
1665static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001666pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001667{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001668 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001669 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001670 static char* kwlist[] = { "source", "maxsplit", NULL };
1671 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1672 &string, &maxsplit))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001673 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001674
1675 /* delegate to Python code */
1676 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1677}
1678
1679static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001680pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001681{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001682 SRE_STATE state;
1683 PyObject* list;
1684 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001685 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001686
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001687 PyObject* string;
1688 int start = 0;
1689 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001690 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1691 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1692 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001694
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001695 string = state_init(&state, self, string, start, end);
1696 if (!string)
1697 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001698
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001699 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001700
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001701 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001702
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001703 PyObject* item;
1704
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001705 state_reset(&state);
1706
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001707 state.ptr = state.start;
1708
1709 if (state.charsize == 1) {
1710 status = sre_search(&state, PatternObject_GetCode(self));
1711 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001712#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001713 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001714#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001715 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001716
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001717 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001718
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001719 /* don't bother to build a match object */
1720 switch (self->groups) {
1721 case 0:
1722 item = PySequence_GetSlice(
1723 string,
1724 ((char*) state.start - (char*) state.beginning) /
1725 state.charsize,
1726 ((char*) state.ptr - (char*) state.beginning) /
1727 state.charsize);
1728 if (!item)
1729 goto error;
1730 break;
1731 case 1:
1732 item = state_getslice(&state, 1, string);
1733 if (!item)
1734 goto error;
1735 break;
1736 default:
1737 item = PyTuple_New(self->groups);
1738 if (!item)
1739 goto error;
1740 for (i = 0; i < self->groups; i++) {
1741 PyObject* o = state_getslice(&state, i+1, string);
1742 if (!o) {
1743 Py_DECREF(item);
1744 goto error;
1745 }
1746 PyTuple_SET_ITEM(item, i, o);
1747 }
1748 break;
1749 }
1750
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001751 status = PyList_Append(list, item);
1752 Py_DECREF(item);
1753
1754 if (status < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001755 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001756
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001757 if (state.ptr == state.start)
1758 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001759 else
1760 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001761
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001762 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001763
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001764 if (status == 0)
1765 break;
1766
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001767 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001768 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001769
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001770 }
1771 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001772
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001773 state_fini(&state);
1774 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001775
1776error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001777 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001778 state_fini(&state);
1779 return NULL;
1780
Guido van Rossumb700df92000-03-31 14:59:30 +00001781}
1782
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001783static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00001784 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1785 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1786 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1787 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1788 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1789 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001790 /* experimental */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001791 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001792 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001793};
1794
1795static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001796pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001797{
1798 PyObject* res;
1799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001800 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001801
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001802 if (res)
1803 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001804
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001805 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001806
1807 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001808 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001809 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001810 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001811 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001812
1813 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001814 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001815
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001816 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001817 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001818
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001819 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001820 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001821 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001822 }
1823
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001824 PyErr_SetString(PyExc_AttributeError, name);
1825 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001826}
1827
1828statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001829 PyObject_HEAD_INIT(NULL)
1830 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001831 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001832 (destructor)pattern_dealloc, /*tp_dealloc*/
1833 0, /*tp_print*/
1834 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001835};
1836
1837/* -------------------------------------------------------------------- */
1838/* match methods */
1839
1840static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001841match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001842{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001843 Py_XDECREF(self->regs);
1844 Py_XDECREF(self->string);
1845 Py_DECREF(self->pattern);
1846 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001847}
1848
1849static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001850match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001851{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001852 if (index < 0 || index >= self->groups) {
1853 /* raise IndexError if we were given a bad group number */
1854 PyErr_SetString(
1855 PyExc_IndexError,
1856 "no such group"
1857 );
1858 return NULL;
1859 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001860
Fredrik Lundh6f013982000-07-03 18:44:21 +00001861 index *= 2;
1862
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001863 if (self->string == Py_None || self->mark[index] < 0) {
1864 /* return default value if the string or group is undefined */
1865 Py_INCREF(def);
1866 return def;
1867 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001868
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001869 return PySequence_GetSlice(
1870 self->string, self->mark[index], self->mark[index+1]
1871 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001872}
1873
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001874static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001875match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001876{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001877 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001878
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001879 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001880 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001881
Fredrik Lundh6f013982000-07-03 18:44:21 +00001882 i = -1;
1883
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001884 if (self->pattern->groupindex) {
1885 index = PyObject_GetItem(self->pattern->groupindex, index);
1886 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001887 if (PyInt_Check(index))
1888 i = (int) PyInt_AS_LONG(index);
1889 Py_DECREF(index);
1890 } else
1891 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001892 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001893
1894 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001895}
1896
1897static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001898match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001899{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001900 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001901}
1902
1903static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001904match_expand(MatchObject* self, PyObject* args)
1905{
1906 PyObject* template;
1907 if (!PyArg_ParseTuple(args, "O:expand", &template))
1908 return NULL;
1909
1910 /* delegate to Python code */
1911 return call(
1912 "_expand",
1913 Py_BuildValue("OOO", self->pattern, self, template)
1914 );
1915}
1916
1917static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001918match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001919{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001920 PyObject* result;
1921 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001922
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001923 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001924
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001925 switch (size) {
1926 case 0:
1927 result = match_getslice(self, Py_False, Py_None);
1928 break;
1929 case 1:
1930 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1931 break;
1932 default:
1933 /* fetch multiple items */
1934 result = PyTuple_New(size);
1935 if (!result)
1936 return NULL;
1937 for (i = 0; i < size; i++) {
1938 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001939 self, PyTuple_GET_ITEM(args, i), Py_None
1940 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001941 if (!item) {
1942 Py_DECREF(result);
1943 return NULL;
1944 }
1945 PyTuple_SET_ITEM(result, i, item);
1946 }
1947 break;
1948 }
1949 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001950}
1951
1952static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001953match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001954{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001955 PyObject* result;
1956 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001957
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001958 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001959 static char* kwlist[] = { "default", NULL };
1960 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001961 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001962
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001963 result = PyTuple_New(self->groups-1);
1964 if (!result)
1965 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001966
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001967 for (index = 1; index < self->groups; index++) {
1968 PyObject* item;
1969 item = match_getslice_by_index(self, index, def);
1970 if (!item) {
1971 Py_DECREF(result);
1972 return NULL;
1973 }
1974 PyTuple_SET_ITEM(result, index-1, item);
1975 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001976
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001977 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001978}
1979
1980static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001981match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001982{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001983 PyObject* result;
1984 PyObject* keys;
1985 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001986
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001987 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001988 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00001989 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001990 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001991
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001992 result = PyDict_New();
1993 if (!result || !self->pattern->groupindex)
1994 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001995
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001996 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00001997 if (!keys)
1998 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00001999
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002000 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002001 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002002 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002003 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002004 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002005 if (!key)
2006 goto failed;
2007 value = match_getslice(self, key, def);
2008 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002009 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002010 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002011 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002012 status = PyDict_SetItem(result, key, value);
2013 Py_DECREF(value);
2014 if (status < 0)
2015 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002016 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002017
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002018 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002019
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002020 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002021
2022failed:
2023 Py_DECREF(keys);
2024 Py_DECREF(result);
2025 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002026}
2027
2028static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002029match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002030{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002031 int index;
2032
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002033 PyObject* index_ = Py_False; /* zero */
2034 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2035 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002036
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002037 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002038
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002039 if (index < 0 || index >= self->groups) {
2040 PyErr_SetString(
2041 PyExc_IndexError,
2042 "no such group"
2043 );
2044 return NULL;
2045 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002046
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002047 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002048 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002049}
2050
2051static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002052match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002053{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002054 int index;
2055
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002056 PyObject* index_ = Py_False; /* zero */
2057 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2058 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002059
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002060 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002062 if (index < 0 || index >= self->groups) {
2063 PyErr_SetString(
2064 PyExc_IndexError,
2065 "no such group"
2066 );
2067 return NULL;
2068 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002069
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002070 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002071 return Py_BuildValue("i", self->mark[index*2+1]);
2072}
2073
2074LOCAL(PyObject*)
2075_pair(int i1, int i2)
2076{
2077 PyObject* pair;
2078 PyObject* item;
2079
2080 pair = PyTuple_New(2);
2081 if (!pair)
2082 return NULL;
2083
2084 item = PyInt_FromLong(i1);
2085 if (!item)
2086 goto error;
2087 PyTuple_SET_ITEM(pair, 0, item);
2088
2089 item = PyInt_FromLong(i2);
2090 if (!item)
2091 goto error;
2092 PyTuple_SET_ITEM(pair, 1, item);
2093
2094 return pair;
2095
2096 error:
2097 Py_DECREF(pair);
2098 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002099}
2100
2101static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002102match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002103{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002104 int index;
2105
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002106 PyObject* index_ = Py_False; /* zero */
2107 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2108 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002109
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002110 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002111
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002112 if (index < 0 || index >= self->groups) {
2113 PyErr_SetString(
2114 PyExc_IndexError,
2115 "no such group"
2116 );
2117 return NULL;
2118 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002119
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002120 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 return _pair(self->mark[index*2], self->mark[index*2+1]);
2122}
2123
2124static PyObject*
2125match_regs(MatchObject* self)
2126{
2127 PyObject* regs;
2128 PyObject* item;
2129 int index;
2130
2131 regs = PyTuple_New(self->groups);
2132 if (!regs)
2133 return NULL;
2134
2135 for (index = 0; index < self->groups; index++) {
2136 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2137 if (!item) {
2138 Py_DECREF(regs);
2139 return NULL;
2140 }
2141 PyTuple_SET_ITEM(regs, index, item);
2142 }
2143
2144 Py_INCREF(regs);
2145 self->regs = regs;
2146
2147 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002148}
2149
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002150static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002151 {"group", (PyCFunction) match_group, METH_VARARGS},
2152 {"start", (PyCFunction) match_start, METH_VARARGS},
2153 {"end", (PyCFunction) match_end, METH_VARARGS},
2154 {"span", (PyCFunction) match_span, METH_VARARGS},
2155 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2156 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2157 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002158 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002159};
2160
2161static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002162match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002163{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002164 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002165
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002166 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2167 if (res)
2168 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002169
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002170 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002171
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002172 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002173 if (self->lastindex >= 0)
2174 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002175 Py_INCREF(Py_None);
2176 return Py_None;
2177 }
2178
2179 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002180 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002181 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002182 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002183 );
2184 if (result)
2185 return result;
2186 PyErr_Clear();
2187 }
2188 Py_INCREF(Py_None);
2189 return Py_None;
2190 }
2191
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002192 if (!strcmp(name, "string")) {
2193 if (self->string) {
2194 Py_INCREF(self->string);
2195 return self->string;
2196 } else {
2197 Py_INCREF(Py_None);
2198 return Py_None;
2199 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002200 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002201
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002202 if (!strcmp(name, "regs")) {
2203 if (self->regs) {
2204 Py_INCREF(self->regs);
2205 return self->regs;
2206 } else
2207 return match_regs(self);
2208 }
2209
2210 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002211 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002212 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002213 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002214
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002215 if (!strcmp(name, "pos"))
2216 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002217
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002218 if (!strcmp(name, "endpos"))
2219 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002220
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002221 PyErr_SetString(PyExc_AttributeError, name);
2222 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002223}
2224
2225/* FIXME: implement setattr("string", None) as a special case (to
2226 detach the associated string, if any */
2227
2228statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002229 PyObject_HEAD_INIT(NULL)
2230 0, "SRE_Match",
2231 sizeof(MatchObject), sizeof(int),
2232 (destructor)match_dealloc, /*tp_dealloc*/
2233 0, /*tp_print*/
2234 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002235};
2236
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002237/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002238/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002239
2240static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002241scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002242{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002243 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002244 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002245 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002246}
2247
2248static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002249scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002250{
2251 SRE_STATE* state = &self->state;
2252 PyObject* match;
2253 int status;
2254
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002255 state_reset(state);
2256
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002257 state->ptr = state->start;
2258
2259 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002260 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002261 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002262#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002263 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002264#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002265 }
2266
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002267 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002268 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002269
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002270 if (status == 0 || state->ptr == state->start)
2271 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002272 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002273 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002274
2275 return match;
2276}
2277
2278
2279static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002280scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002281{
2282 SRE_STATE* state = &self->state;
2283 PyObject* match;
2284 int status;
2285
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002286 state_reset(state);
2287
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002288 state->ptr = state->start;
2289
2290 if (state->charsize == 1) {
2291 status = sre_search(state, PatternObject_GetCode(self->pattern));
2292 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002293#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002294 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002295#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002296 }
2297
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002298 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002299 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002300
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002301 if (status == 0 || state->ptr == state->start)
2302 state->start = (void*) ((char*) state->ptr + state->charsize);
2303 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002304 state->start = state->ptr;
2305
2306 return match;
2307}
2308
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002309static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002310 {"match", (PyCFunction) scanner_match, 0},
2311 {"search", (PyCFunction) scanner_search, 0},
2312 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002313};
2314
2315static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002316scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002317{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002318 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002319
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002320 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2321 if (res)
2322 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002323
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002324 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002325
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002326 /* attributes */
2327 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002328 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002329 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002330 }
2331
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002332 PyErr_SetString(PyExc_AttributeError, name);
2333 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002334}
2335
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002336statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002337 PyObject_HEAD_INIT(NULL)
2338 0, "SRE_Scanner",
2339 sizeof(ScannerObject), 0,
2340 (destructor)scanner_dealloc, /*tp_dealloc*/
2341 0, /*tp_print*/
2342 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002343};
2344
Guido van Rossumb700df92000-03-31 14:59:30 +00002345static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002346 {"compile", _compile, 1},
2347 {"getcodesize", sre_codesize, 1},
2348 {"getlower", sre_getlower, 1},
2349 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002350};
2351
2352void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002353#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002354__declspec(dllexport)
2355#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002356init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002357{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002358 PyObject* m;
2359 PyObject* d;
2360
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002361 /* Patch object types */
2362 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002363 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002364
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002365 m = Py_InitModule("_" MODULE, _functions);
2366 d = PyModule_GetDict(m);
2367
2368 PyDict_SetItemString(
2369 d, "MAGIC", (PyObject*) PyInt_FromLong(SRE_MAGIC)
2370 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002371}
2372
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002373#endif /* !defined(SRE_RECURSIVE) */