blob: 954547f553d2f206f23915e8a67ecdbc49e20f92 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
26 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
27 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * other compatibility work.
35 */
36
37#ifndef SRE_RECURSIVE
38
Fredrik Lundhebc37b22000-10-28 19:30:41 +000039char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
41#include "Python.h"
42
43#include "sre.h"
44
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000045#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000046
Fredrik Lundh436c3d582000-06-29 08:58:44 +000047/* name of this module, minus the leading underscore */
48#define MODULE "sre"
49
Guido van Rossumb700df92000-03-31 14:59:30 +000050/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000051#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000052
Fredrik Lundh436c3d582000-06-29 08:58:44 +000053#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000054/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000055#define HAVE_UNICODE
56#endif
57
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000058/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000059/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060
Fredrik Lundh33accc12000-08-27 20:59:47 +000061/* prevent run-away recursion (bad patterns on long strings) */
62
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000063#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000064#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
65/* require smaller recursion limit for a number of 64-bit platforms:
66 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
67/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
68#define USE_RECURSION_LIMIT 7500
69#else
70#define USE_RECURSION_LIMIT 10000
71#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000072#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000073
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000074/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000075#define USE_FAST_SEARCH
76
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000078#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079
80/* -------------------------------------------------------------------- */
81
Fredrik Lundh80946112000-06-29 18:03:25 +000082#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000083#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000084#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000085/* fastest possible local call under MSVC */
86#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000087#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000088#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000089#else
90#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000091#endif
92
93/* error codes */
94#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000095#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000096#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000097#define SRE_ERROR_MEMORY -9 /* out of memory */
98
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000099#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000100#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000101#else
102#define TRACE(v)
103#endif
104
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000105/* -------------------------------------------------------------------- */
106/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000107
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000108/* default character predicates (run sre_chars.py to regenerate tables) */
109
110#define SRE_DIGIT_MASK 1
111#define SRE_SPACE_MASK 2
112#define SRE_LINEBREAK_MASK 4
113#define SRE_ALNUM_MASK 8
114#define SRE_WORD_MASK 16
115
116static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11925, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1210, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
123
Fredrik Lundhb389df32000-06-29 12:48:37 +0000124static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012510, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12627, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12744, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12861, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
129108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
130122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
131106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
132120, 121, 122, 123, 124, 125, 126, 127 };
133
Fredrik Lundhb389df32000-06-29 12:48:37 +0000134static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000135{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000136 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000137}
138
139#define SRE_IS_DIGIT(ch)\
140 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
141#define SRE_IS_SPACE(ch)\
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
143#define SRE_IS_LINEBREAK(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
145#define SRE_IS_ALNUM(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
147#define SRE_IS_WORD(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000149
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000150/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000151
Fredrik Lundhb389df32000-06-29 12:48:37 +0000152static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000153{
154 return ((ch) < 256 ? tolower((ch)) : ch);
155}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000156#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
157#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
158#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
159#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
160#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
161
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000162/* unicode-specific character predicates */
163
164#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000165static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000166{
167 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
168}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000169#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
170#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
171#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000172#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000173#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000174#endif
175
Guido van Rossumb700df92000-03-31 14:59:30 +0000176LOCAL(int)
177sre_category(SRE_CODE category, unsigned int ch)
178{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000180
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000181 case SRE_CATEGORY_DIGIT:
182 return SRE_IS_DIGIT(ch);
183 case SRE_CATEGORY_NOT_DIGIT:
184 return !SRE_IS_DIGIT(ch);
185 case SRE_CATEGORY_SPACE:
186 return SRE_IS_SPACE(ch);
187 case SRE_CATEGORY_NOT_SPACE:
188 return !SRE_IS_SPACE(ch);
189 case SRE_CATEGORY_WORD:
190 return SRE_IS_WORD(ch);
191 case SRE_CATEGORY_NOT_WORD:
192 return !SRE_IS_WORD(ch);
193 case SRE_CATEGORY_LINEBREAK:
194 return SRE_IS_LINEBREAK(ch);
195 case SRE_CATEGORY_NOT_LINEBREAK:
196 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000197
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 case SRE_CATEGORY_LOC_WORD:
199 return SRE_LOC_IS_WORD(ch);
200 case SRE_CATEGORY_LOC_NOT_WORD:
201 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000202
203#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000204 case SRE_CATEGORY_UNI_DIGIT:
205 return SRE_UNI_IS_DIGIT(ch);
206 case SRE_CATEGORY_UNI_NOT_DIGIT:
207 return !SRE_UNI_IS_DIGIT(ch);
208 case SRE_CATEGORY_UNI_SPACE:
209 return SRE_UNI_IS_SPACE(ch);
210 case SRE_CATEGORY_UNI_NOT_SPACE:
211 return !SRE_UNI_IS_SPACE(ch);
212 case SRE_CATEGORY_UNI_WORD:
213 return SRE_UNI_IS_WORD(ch);
214 case SRE_CATEGORY_UNI_NOT_WORD:
215 return !SRE_UNI_IS_WORD(ch);
216 case SRE_CATEGORY_UNI_LINEBREAK:
217 return SRE_UNI_IS_LINEBREAK(ch);
218 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
219 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000220#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000221 }
222 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000223}
224
225/* helpers */
226
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000227static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000228mark_fini(SRE_STATE* state)
229{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000230 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000231 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000232 state->mark_stack = NULL;
233 }
234 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000235}
236
237static int
238mark_save(SRE_STATE* state, int lo, int hi)
239{
240 void* stack;
241 int size;
242 int minsize, newsize;
243
244 if (hi <= lo)
245 return 0;
246
247 size = (hi - lo) + 1;
248
249 newsize = state->mark_stack_size;
250 minsize = state->mark_stack_base + size;
251
252 if (newsize < minsize) {
253 /* create new stack */
254 if (!newsize) {
255 newsize = 512;
256 if (newsize < minsize)
257 newsize = minsize;
258 TRACE(("allocate stack %d\n", newsize));
259 stack = malloc(sizeof(void*) * newsize);
260 } else {
261 /* grow the stack */
262 while (newsize < minsize)
263 newsize += newsize;
264 TRACE(("grow stack to %d\n", newsize));
265 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
266 }
267 if (!stack) {
268 mark_fini(state);
269 return SRE_ERROR_MEMORY;
270 }
271 state->mark_stack = stack;
272 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000273 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000274
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000275 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000276
277 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
278 size * sizeof(void*));
279
280 state->mark_stack_base += size;
281
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000282 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000283}
284
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000285static int
286mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000287{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000288 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000289
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000290 if (hi <= lo)
291 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000292
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000293 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000294
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000295 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000296
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000297 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000298
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000299 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
300 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000301
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000302 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000303}
304
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000305/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000306
307#define SRE_CHAR unsigned char
308#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000309#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000310#define SRE_CHARSET sre_charset
311#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000312#define SRE_MATCH sre_match
313#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000314
315#if defined(HAVE_UNICODE)
316
Guido van Rossumb700df92000-03-31 14:59:30 +0000317#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000318#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000319#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000320
Guido van Rossumb700df92000-03-31 14:59:30 +0000321#undef SRE_SEARCH
322#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000323#undef SRE_INFO
324#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000325#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000326#undef SRE_AT
327#undef SRE_CHAR
328
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000329/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000330
331#define SRE_CHAR Py_UNICODE
332#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000333#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000334#define SRE_CHARSET sre_ucharset
335#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000336#define SRE_MATCH sre_umatch
337#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000338#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000339
340#endif /* SRE_RECURSIVE */
341
342/* -------------------------------------------------------------------- */
343/* String matching engine */
344
345/* the following section is compiled twice, with different character
346 settings */
347
348LOCAL(int)
349SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
350{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000351 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000354
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000355 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000357 case SRE_AT_BEGINNING:
358 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000359
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000360 case SRE_AT_BEGINNING_LINE:
361 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000362 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000363
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000364 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000365 return (((void*) (ptr+1) == state->end &&
366 SRE_IS_LINEBREAK((int) ptr[0])) ||
367 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000368
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000369 case SRE_AT_END_LINE:
370 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000371 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000372
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000373 case SRE_AT_BOUNDARY:
374 if (state->beginning == state->end)
375 return 0;
376 that = ((void*) ptr > state->beginning) ?
377 SRE_IS_WORD((int) ptr[-1]) : 0;
378 this = ((void*) ptr < state->end) ?
379 SRE_IS_WORD((int) ptr[0]) : 0;
380 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000382 case SRE_AT_NON_BOUNDARY:
383 if (state->beginning == state->end)
384 return 0;
385 that = ((void*) ptr > state->beginning) ?
386 SRE_IS_WORD((int) ptr[-1]) : 0;
387 this = ((void*) ptr < state->end) ?
388 SRE_IS_WORD((int) ptr[0]) : 0;
389 return this == that;
390 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000391
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000392 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000393}
394
395LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000396SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000397{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000398 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000399
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000400 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 for (;;) {
403 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000404
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000405 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000406 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 if (ch == set[0])
408 return ok;
409 set++;
410 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000412 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000413 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000414 if (set[0] <= ch && ch <= set[1])
415 return ok;
416 set += 2;
417 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000418
Fredrik Lundh3562f112000-07-02 12:00:07 +0000419 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000420 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000421 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
422 return ok;
423 set += 16;
424 break;
425
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000426 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000427 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000428 if (sre_category(set[0], (int) ch))
429 return ok;
430 set += 1;
431 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000432
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000433 case SRE_OP_NEGATE:
434 ok = !ok;
435 break;
436
437 case SRE_OP_FAILURE:
438 return !ok;
439
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000440 default:
441 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000442 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000443 return 0;
444 }
445 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000446}
447
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000448LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
449
450LOCAL(int)
451SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
452{
453 SRE_CODE chr;
454 SRE_CHAR* ptr = state->ptr;
455 SRE_CHAR* end = state->end;
456 int i;
457
458 /* adjust end */
459 if (maxcount < end - ptr && maxcount != 65535)
460 end = ptr + maxcount;
461
462 switch (pattern[0]) {
463
464 case SRE_OP_ANY:
465 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000466 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000467 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
468 ptr++;
469 break;
470
471 case SRE_OP_ANY_ALL:
472 /* repeated dot wildcare. skip to the end of the target
473 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000474 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000475 ptr = end;
476 break;
477
478 case SRE_OP_LITERAL:
479 /* repeated literal */
480 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000481 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000482 while (ptr < end && (SRE_CODE) *ptr == chr)
483 ptr++;
484 break;
485
486 case SRE_OP_LITERAL_IGNORE:
487 /* repeated literal */
488 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000489 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000490 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
491 ptr++;
492 break;
493
494 case SRE_OP_NOT_LITERAL:
495 /* repeated non-literal */
496 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000497 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000498 while (ptr < end && (SRE_CODE) *ptr != chr)
499 ptr++;
500 break;
501
502 case SRE_OP_NOT_LITERAL_IGNORE:
503 /* repeated non-literal */
504 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000505 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000506 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
507 ptr++;
508 break;
509
510 case SRE_OP_IN:
511 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000512 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
513 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000514 ptr++;
515 break;
516
517 default:
518 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000519 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000520 while ((SRE_CHAR*) state->ptr < end) {
521 i = SRE_MATCH(state, pattern, level);
522 if (i < 0)
523 return i;
524 if (!i)
525 break;
526 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000527 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
528 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000529 return (SRE_CHAR*) state->ptr - ptr;
530 }
531
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000532 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000533 return ptr - (SRE_CHAR*) state->ptr;
534}
535
Fredrik Lundh33accc12000-08-27 20:59:47 +0000536#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000537LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000538SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
539{
540 /* check if an SRE_OP_INFO block matches at the current position.
541 returns the number of SRE_CODE objects to skip if successful, 0
542 if no match */
543
544 SRE_CHAR* end = state->end;
545 SRE_CHAR* ptr = state->ptr;
546 int i;
547
548 /* check minimal length */
549 if (pattern[3] && (end - ptr) < pattern[3])
550 return 0;
551
552 /* check known prefix */
553 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
554 /* <length> <skip> <prefix data> <overlap data> */
555 for (i = 0; i < pattern[5]; i++)
556 if ((SRE_CODE) ptr[i] != pattern[7 + i])
557 return 0;
558 return pattern[0] + 2 * pattern[6];
559 }
560 return pattern[0];
561}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000562#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000563
564LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000565SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000566{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000567 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000568 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000569
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000570 SRE_CHAR* end = state->end;
571 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000572 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000573 SRE_REPEAT* rp;
574 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000575 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000576
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000577 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000578
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000579 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000580
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000581#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000582 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000583 return SRE_ERROR_RECURSION_LIMIT;
584#endif
585
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000586#if defined(USE_RECURSION_LIMIT)
587 if (level > USE_RECURSION_LIMIT)
588 return SRE_ERROR_RECURSION_LIMIT;
589#endif
590
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000591 if (pattern[0] == SRE_OP_INFO) {
592 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000593 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000594 if (pattern[3] && (end - ptr) < pattern[3]) {
595 TRACE(("reject (got %d chars, need %d)\n",
596 (end - ptr), pattern[3]));
597 return 0;
598 }
599 pattern += pattern[1] + 1;
600 }
601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000602 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000603
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000604 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 case SRE_OP_FAILURE:
607 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000608 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000609 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000610
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000611 case SRE_OP_SUCCESS:
612 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000613 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000614 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000615 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000616
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000617 case SRE_OP_AT:
618 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000619 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000620 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000621 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000622 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000623 pattern++;
624 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000625
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000626 case SRE_OP_CATEGORY:
627 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000628 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000629 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000630 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000631 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000632 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000633 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000634 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000635
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000636 case SRE_OP_LITERAL:
637 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000638 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000639 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000640 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000641 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000642 pattern++;
643 ptr++;
644 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000645
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000646 case SRE_OP_NOT_LITERAL:
647 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000648 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000649 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000650 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000651 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000652 pattern++;
653 ptr++;
654 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000655
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000656 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000657 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000658 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000659 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000660 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
661 return 0;
662 ptr++;
663 break;
664
665 case SRE_OP_ANY_ALL:
666 /* match anything */
667 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000668 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000669 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000670 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000671 ptr++;
672 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000673
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674 case SRE_OP_IN:
675 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000676 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000677 TRACE(("|%p|%p|IN\n", pattern, ptr));
678 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000679 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000680 pattern += pattern[0];
681 ptr++;
682 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000683
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000684 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000685 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000686 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000687 i = pattern[0];
688 {
689 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
690 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
691 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000692 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000693 while (p < e) {
694 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000695 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000696 p++; ptr++;
697 }
698 }
699 pattern++;
700 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000701
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000702 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000703 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000704 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000705 i = pattern[0];
706 {
707 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
708 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
709 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000710 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000711 while (p < e) {
712 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000713 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000714 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000715 p++; ptr++;
716 }
717 }
718 pattern++;
719 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000720
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000721 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000722 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000723 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000724 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000725 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000726 pattern++;
727 ptr++;
728 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000729
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000730 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000731 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000733 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000734 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000735 pattern++;
736 ptr++;
737 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000738
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000740 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000742 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000743 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000744 pattern += pattern[0];
745 ptr++;
746 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000747
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 case SRE_OP_MARK:
749 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000750 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000751 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000752 i = pattern[0];
753 if (i & 1)
754 state->lastindex = i/2 + 1;
755 if (i > state->lastmark)
756 state->lastmark = i;
757 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000758 pattern++;
759 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000760
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000761 case SRE_OP_JUMP:
762 case SRE_OP_INFO:
763 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000764 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000765 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000766 pattern += pattern[0];
767 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000768
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000769 case SRE_OP_ASSERT:
770 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000771 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000772 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000773 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000774 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000775 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000776 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000777 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000778 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000779 pattern += pattern[0];
780 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000781
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000782 case SRE_OP_ASSERT_NOT:
783 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000784 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000785 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000786 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000787 if (state->ptr >= state->beginning) {
788 i = SRE_MATCH(state, pattern + 2, level + 1);
789 if (i < 0)
790 return i;
791 if (i)
792 return 0;
793 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000794 pattern += pattern[0];
795 break;
796
797 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000798 /* alternation */
799 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000800 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000801 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000802 for (; pattern[0]; pattern += pattern[0]) {
803 if (pattern[1] == SRE_OP_LITERAL &&
804 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
805 continue;
806 if (pattern[1] == SRE_OP_IN &&
807 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
808 continue;
809 state->ptr = ptr;
810 i = SRE_MATCH(state, pattern + 1, level + 1);
811 if (i)
812 return i;
813 if (state->lastmark > lastmark) {
814 memset(
815 state->mark + lastmark + 1, 0,
816 (state->lastmark - lastmark) * sizeof(void*)
817 );
818 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000819 }
820 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000821 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000822
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000823 case SRE_OP_REPEAT_ONE:
824 /* match repeated sequence (maximizing regexp) */
825
826 /* this operator only works if the repeated item is
827 exactly one character wide, and we're not already
828 collecting backtracking points. for other cases,
829 use the MAX_REPEAT operator instead */
830
831 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
832
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000833 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000834 pattern[1], pattern[2]));
835
Fredrik Lundhe1869832000-08-01 22:47:49 +0000836 if (ptr + pattern[1] > end)
837 return 0; /* cannot match */
838
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000839 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000840
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000841 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
842 if (count < 0)
843 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000844
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000845 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000846
847 /* when we arrive here, count contains the number of
848 matches, and ptr points to the tail of the target
849 string. check if the rest of the pattern matches,
850 and backtrack if not. */
851
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000852 if (count < (int) pattern[1])
853 return 0;
854
855 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
856 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000857 state->ptr = ptr;
858 return 1;
859
860 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
861 /* tail starts with a literal. skip positions where
862 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000863 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000864 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000865 while (count >= (int) pattern[1] &&
866 (ptr >= end || *ptr != chr)) {
867 ptr--;
868 count--;
869 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000870 if (count < (int) pattern[1])
871 break;
872 state->ptr = ptr;
873 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000874 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000875 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000876 ptr--;
877 count--;
878 }
879
880 } else {
881 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000882 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000883 while (count >= (int) pattern[1]) {
884 state->ptr = ptr;
885 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000886 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000887 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000888 ptr--;
889 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000890 if (state->lastmark > lastmark) {
891 memset(
892 state->mark + lastmark + 1, 0,
893 (state->lastmark - lastmark) * sizeof(void*)
894 );
895 state->lastmark = lastmark;
896 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000897 }
898 }
899 return 0;
900
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000901 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000902 /* create repeat context. all the hard work is done
903 by the UNTIL operator */
904 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000905 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000906 pattern[1], pattern[2]));
907
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000908 rep.count = -1;
909 rep.pattern = pattern;
910
911 /* install new repeat context */
912 rep.prev = state->repeat;
913 state->repeat = &rep;
914
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000915 state->ptr = ptr;
916 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000917
918 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000919
920 return i;
921
922 case SRE_OP_MAX_UNTIL:
923 /* maximizing repeat */
924 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
925
926 /* FIXME: we probably need to deal with zero-width
927 matches in here... */
928
929 rp = state->repeat;
930 if (!rp)
931 return SRE_ERROR_STATE;
932
933 state->ptr = ptr;
934
935 count = rp->count + 1;
936
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000937 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000938
939 if (count < rp->pattern[1]) {
940 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000941 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000942 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000943 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000944 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000945 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000946 rp->count = count - 1;
947 state->ptr = ptr;
948 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000949 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000950
951 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000952 /* we may have enough matches, but if we can
953 match another item, do so */
954 rp->count = count;
955 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000956 i = mark_save(state, 0, lastmark);
957 if (i < 0)
958 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000959 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000960 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000961 if (i)
962 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000963 i = mark_restore(state, 0, lastmark);
964 if (i < 0)
965 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000966 rp->count = count - 1;
967 state->ptr = ptr;
968 }
969
970 /* cannot match more repeated items here. make sure the
971 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000972 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000973 i = SRE_MATCH(state, pattern, level + 1);
974 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000975 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000976 state->repeat = rp;
977 return 0;
978
979 case SRE_OP_MIN_UNTIL:
980 /* minimizing repeat */
981 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
982
983 rp = state->repeat;
984 if (!rp)
985 return SRE_ERROR_STATE;
986
987 count = rp->count + 1;
988
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000989 TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000990
991 state->ptr = ptr;
992
993 if (count < rp->pattern[1]) {
994 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000995 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000996 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000997 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000998 if (i)
999 return i;
1000 rp->count = count-1;
1001 state->ptr = ptr;
1002 return 0;
1003 }
1004
1005 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001006 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001007 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001008 if (i) {
1009 /* free(rp); */
1010 return i;
1011 }
1012 state->repeat = rp;
1013
1014 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1015 return 0;
1016
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001017 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001018 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001019 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001020 if (i)
1021 return i;
1022 rp->count = count - 1;
1023 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001024
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001025 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001026 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001027 return SRE_ERROR_ILLEGAL;
1028 }
1029 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001030
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001031 /* shouldn't end up here */
1032 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001033}
1034
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001035LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001036SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1037{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001038 SRE_CHAR* ptr = state->start;
1039 SRE_CHAR* end = state->end;
1040 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001041 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001042 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001043 SRE_CODE* prefix = NULL;
1044 SRE_CODE* charset = NULL;
1045 SRE_CODE* overlap = NULL;
1046 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001047
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001048 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001049 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001050 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001051
1052 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001053
1054 if (pattern[3] > 0) {
1055 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001056 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001057 end -= pattern[3]-1;
1058 if (end <= ptr)
1059 end = ptr+1;
1060 }
1061
Fredrik Lundh3562f112000-07-02 12:00:07 +00001062 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001063 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001064 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001065 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001066 prefix_skip = pattern[6];
1067 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001068 overlap = prefix + prefix_len - 1;
1069 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001070 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001071 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001072 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001073
1074 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001075 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001076
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001077 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1078 TRACE(("charset = %p\n", charset));
1079
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001080#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001081 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001082 /* pattern starts with a known prefix. use the overlap
1083 table to skip forward as fast as we possibly can */
1084 int i = 0;
1085 end = state->end;
1086 while (ptr < end) {
1087 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001088 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001089 if (!i)
1090 break;
1091 else
1092 i = overlap[i];
1093 } else {
1094 if (++i == prefix_len) {
1095 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001096 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1097 state->start = ptr + 1 - prefix_len;
1098 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001099 if (flags & SRE_INFO_LITERAL)
1100 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001101 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001102 if (status != 0)
1103 return status;
1104 /* close but no cigar -- try again */
1105 i = overlap[i];
1106 }
1107 break;
1108 }
1109
1110 }
1111 ptr++;
1112 }
1113 return 0;
1114 }
1115#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001116
Fredrik Lundh3562f112000-07-02 12:00:07 +00001117 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001118 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001119 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001120 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001121 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001122 for (;;) {
1123 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1124 ptr++;
1125 if (ptr == end)
1126 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001127 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001128 state->start = ptr;
1129 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001130 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001131 if (status != 0)
1132 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001133 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001134 } else if (charset) {
1135 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001136 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001137 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001138 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001139 ptr++;
1140 if (ptr == end)
1141 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001142 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001143 state->start = ptr;
1144 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001145 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001146 if (status != 0)
1147 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001148 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001149 }
1150 } else
1151 /* general case */
1152 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001153 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001154 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001155 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001156 if (status != 0)
1157 break;
1158 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001160 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001161}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001162
Guido van Rossumb700df92000-03-31 14:59:30 +00001163
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001164#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001165
1166/* -------------------------------------------------------------------- */
1167/* factories and destructors */
1168
1169/* see sre.h for object declarations */
1170
1171staticforward PyTypeObject Pattern_Type;
1172staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001173staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001174
1175static PyObject *
1176_compile(PyObject* self_, PyObject* args)
1177{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001178 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001179
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001180 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001181 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001182
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001183 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001184 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001185 PyObject* code;
1186 int groups = 0;
1187 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001188 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001189 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001190 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001191 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001192
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001193 code = PySequence_Fast(code, "code argument must be a sequence");
1194 if (!code)
1195 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001196
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001197#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001198 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001199#else
1200 n = PySequence_Length(code);
1201#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001202
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001203 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001204 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001205 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001206 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001207 }
1208
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001209 for (i = 0; i < n; i++) {
1210 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001211 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001212 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001213
1214 Py_DECREF(code);
1215
1216 if (PyErr_Occurred())
1217 return NULL;
1218
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001219 Py_INCREF(pattern);
1220 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001221
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001222 self->flags = flags;
1223
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001224 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001225
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001226 Py_XINCREF(groupindex);
1227 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001228
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001229 Py_XINCREF(indexgroup);
1230 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001232 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001233}
1234
1235static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001236sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001237{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001238 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001239}
1240
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001241static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001242sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001243{
1244 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001245 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001246 return NULL;
1247 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001248 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001249#if defined(HAVE_UNICODE)
1250 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001251 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001252#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001253 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001254}
1255
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001256LOCAL(void)
1257state_reset(SRE_STATE* state)
1258{
1259 int i;
1260
1261 state->lastmark = 0;
1262
1263 /* FIXME: dynamic! */
1264 for (i = 0; i < SRE_MARK_SIZE; i++)
1265 state->mark[i] = NULL;
1266
1267 state->lastindex = -1;
1268
1269 state->repeat = NULL;
1270
1271 mark_fini(state);
1272}
1273
Guido van Rossumb700df92000-03-31 14:59:30 +00001274LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001275state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1276 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001277{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001278 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001279
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001280 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001281 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001282 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001283
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001284 memset(state, 0, sizeof(SRE_STATE));
1285
1286 state->lastindex = -1;
1287
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001288#if defined(HAVE_UNICODE)
1289 if (PyUnicode_Check(string)) {
1290 /* unicode strings doesn't always support the buffer interface */
1291 ptr = (void*) PyUnicode_AS_DATA(string);
1292 bytes = PyUnicode_GET_DATA_SIZE(string);
1293 size = PyUnicode_GET_SIZE(string);
1294 state->charsize = sizeof(Py_UNICODE);
1295
1296 } else {
1297#endif
1298
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001299 /* get pointer to string buffer */
1300 buffer = string->ob_type->tp_as_buffer;
1301 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1302 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001303 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001304 return NULL;
1305 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001306
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001307 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001308 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1309 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001310 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1311 return NULL;
1312 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001313
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001314 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001315#if PY_VERSION_HEX >= 0x01060000
1316 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001317#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001318 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001319#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001320
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001321 if (PyString_Check(string) || bytes == size)
1322 state->charsize = 1;
1323#if defined(HAVE_UNICODE)
1324 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1325 state->charsize = sizeof(Py_UNICODE);
1326#endif
1327 else {
1328 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1329 return NULL;
1330 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001331
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001332#if defined(HAVE_UNICODE)
1333 }
1334#endif
1335
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001336 /* adjust boundaries */
1337 if (start < 0)
1338 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001339 else if (start > size)
1340 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001341
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001342 if (end < 0)
1343 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001344 else if (end > size)
1345 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001346
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001347 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001348
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001349 state->start = (void*) ((char*) ptr + start * state->charsize);
1350 state->end = (void*) ((char*) ptr + end * state->charsize);
1351
1352 Py_INCREF(string);
1353 state->string = string;
1354 state->pos = start;
1355 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001356
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001357 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001358 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001359#if defined(HAVE_UNICODE)
1360 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001361 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001362#endif
1363 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001364 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001365
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001366 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001367}
1368
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001369LOCAL(void)
1370state_fini(SRE_STATE* state)
1371{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001372 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001373 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001374}
1375
1376LOCAL(PyObject*)
1377state_getslice(SRE_STATE* state, int index, PyObject* string)
1378{
Fredrik Lundh58100642000-08-09 09:14:35 +00001379 int i, j;
1380
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001381 index = (index - 1) * 2;
1382
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001383 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001384 i = j = 0;
1385 } else {
1386 i = ((char*)state->mark[index] - (char*)state->beginning) /
1387 state->charsize;
1388 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1389 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001390 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001391
Fredrik Lundh58100642000-08-09 09:14:35 +00001392 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001393}
1394
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001395static void
1396pattern_error(int status)
1397{
1398 switch (status) {
1399 case SRE_ERROR_RECURSION_LIMIT:
1400 PyErr_SetString(
1401 PyExc_RuntimeError,
1402 "maximum recursion limit exceeded"
1403 );
1404 break;
1405 case SRE_ERROR_MEMORY:
1406 PyErr_NoMemory();
1407 break;
1408 default:
1409 /* other error codes indicate compiler/engine bugs */
1410 PyErr_SetString(
1411 PyExc_RuntimeError,
1412 "internal error in regular expression engine"
1413 );
1414 }
1415}
1416
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001417static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001418pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001419{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001420 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001421
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001422 MatchObject* match;
1423 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001424 char* base;
1425 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001427 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001429 /* create match object (with room for extra group marks) */
1430 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001431 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001432 if (!match)
1433 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001434
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001435 Py_INCREF(pattern);
1436 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001437
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001438 Py_INCREF(state->string);
1439 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001440
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001441 match->regs = NULL;
1442 match->groups = pattern->groups+1;
1443
1444 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001445
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001446 base = (char*) state->beginning;
1447 n = state->charsize;
1448
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001449 match->mark[0] = ((char*) state->start - base) / n;
1450 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001451
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001452 for (i = j = 0; i < pattern->groups; i++, j+=2)
1453 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1454 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1455 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1456 } else
1457 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1458
1459 match->pos = state->pos;
1460 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001461
Fredrik Lundh6f013982000-07-03 18:44:21 +00001462 match->lastindex = state->lastindex;
1463
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001464 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001465
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001466 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001467
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001468 /* no match */
1469 Py_INCREF(Py_None);
1470 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001471
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001472 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001473
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001474 /* internal error */
1475 pattern_error(status);
1476 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001477}
1478
1479static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001480pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001481{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001482 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001483
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001484 ScannerObject* self;
1485
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001486 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001487 int start = 0;
1488 int end = INT_MAX;
1489 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1490 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001491
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001492 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001493 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001494 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001495 return NULL;
1496
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001497 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001498 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001499 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001500 return NULL;
1501 }
1502
1503 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001504 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001505
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001506 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001507}
1508
Guido van Rossumb700df92000-03-31 14:59:30 +00001509static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001510pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001511{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001512 Py_XDECREF(self->pattern);
1513 Py_XDECREF(self->groupindex);
1514 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001515}
1516
1517static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001518pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001519{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001520 SRE_STATE state;
1521 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001522
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001523 PyObject* string;
1524 int start = 0;
1525 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001526 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1527 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1528 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001529 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001530
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001531 string = state_init(&state, self, string, start, end);
1532 if (!string)
1533 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001534
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001535 state.ptr = state.start;
1536
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001537 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1538
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001539 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001540 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001541 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001542#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001543 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001544#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001545 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001546
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001547 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1548
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001549 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001550
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001551 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001552}
1553
1554static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001555pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001556{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001557 SRE_STATE state;
1558 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001559
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001560 PyObject* string;
1561 int start = 0;
1562 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001563 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1564 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1565 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001566 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001567
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001568 string = state_init(&state, self, string, start, end);
1569 if (!string)
1570 return NULL;
1571
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001572 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1573
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001574 if (state.charsize == 1) {
1575 status = sre_search(&state, PatternObject_GetCode(self));
1576 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001577#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001578 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001579#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001580 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001581
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001582 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1583
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001584 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001585
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001586 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001587}
1588
1589static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001590call(char* function, PyObject* args)
1591{
1592 PyObject* name;
1593 PyObject* module;
1594 PyObject* func;
1595 PyObject* result;
1596
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001597 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001598 if (!name)
1599 return NULL;
1600 module = PyImport_Import(name);
1601 Py_DECREF(name);
1602 if (!module)
1603 return NULL;
1604 func = PyObject_GetAttrString(module, function);
1605 Py_DECREF(module);
1606 if (!func)
1607 return NULL;
1608 result = PyObject_CallObject(func, args);
1609 Py_DECREF(func);
1610 Py_DECREF(args);
1611 return result;
1612}
1613
1614static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001615pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001616{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001617 PyObject* template;
1618 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001619 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001620 static char* kwlist[] = { "repl", "string", "count", NULL };
1621 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1622 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001623 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001624
1625 /* delegate to Python code */
1626 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1627}
1628
1629static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001630pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001631{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001632 PyObject* template;
1633 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001634 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001635 static char* kwlist[] = { "repl", "string", "count", NULL };
1636 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1637 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001638 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001639
1640 /* delegate to Python code */
1641 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1642}
1643
1644static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001645pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001646{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001647 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001648 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001649 static char* kwlist[] = { "source", "maxsplit", NULL };
1650 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1651 &string, &maxsplit))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001652 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001653
1654 /* delegate to Python code */
1655 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1656}
1657
1658static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001659pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001660{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001661 SRE_STATE state;
1662 PyObject* list;
1663 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001664 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001665
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001666 PyObject* string;
1667 int start = 0;
1668 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001669 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1670 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1671 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001672 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001673
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001674 string = state_init(&state, self, string, start, end);
1675 if (!string)
1676 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001677
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001678 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001679
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001680 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001681
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001682 PyObject* item;
1683
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001684 state_reset(&state);
1685
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001686 state.ptr = state.start;
1687
1688 if (state.charsize == 1) {
1689 status = sre_search(&state, PatternObject_GetCode(self));
1690 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001691#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001692 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001693#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001694 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001695
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001696 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001697
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001698 /* don't bother to build a match object */
1699 switch (self->groups) {
1700 case 0:
1701 item = PySequence_GetSlice(
1702 string,
1703 ((char*) state.start - (char*) state.beginning) /
1704 state.charsize,
1705 ((char*) state.ptr - (char*) state.beginning) /
1706 state.charsize);
1707 if (!item)
1708 goto error;
1709 break;
1710 case 1:
1711 item = state_getslice(&state, 1, string);
1712 if (!item)
1713 goto error;
1714 break;
1715 default:
1716 item = PyTuple_New(self->groups);
1717 if (!item)
1718 goto error;
1719 for (i = 0; i < self->groups; i++) {
1720 PyObject* o = state_getslice(&state, i+1, string);
1721 if (!o) {
1722 Py_DECREF(item);
1723 goto error;
1724 }
1725 PyTuple_SET_ITEM(item, i, o);
1726 }
1727 break;
1728 }
1729
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001730 status = PyList_Append(list, item);
1731 Py_DECREF(item);
1732
1733 if (status < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001734 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001735
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001736 if (state.ptr == state.start)
1737 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001738 else
1739 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001740
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001741 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001742
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001743 if (status == 0)
1744 break;
1745
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001746 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001747 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001749 }
1750 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001752 state_fini(&state);
1753 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001754
1755error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001756 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001757 state_fini(&state);
1758 return NULL;
1759
Guido van Rossumb700df92000-03-31 14:59:30 +00001760}
1761
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001762static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00001763 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1764 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1765 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1766 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1767 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1768 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001769 /* experimental */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001770 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001771 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001772};
1773
1774static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001775pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001776{
1777 PyObject* res;
1778
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001779 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001780
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001781 if (res)
1782 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001783
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001784 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001785
1786 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001787 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001788 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001789 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001790 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001791
1792 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001793 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001794
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001795 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001796 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001798 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001799 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001800 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001801 }
1802
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001803 PyErr_SetString(PyExc_AttributeError, name);
1804 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001805}
1806
1807statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001808 PyObject_HEAD_INIT(NULL)
1809 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001810 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001811 (destructor)pattern_dealloc, /*tp_dealloc*/
1812 0, /*tp_print*/
1813 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001814};
1815
1816/* -------------------------------------------------------------------- */
1817/* match methods */
1818
1819static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001820match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001821{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001822 Py_XDECREF(self->regs);
1823 Py_XDECREF(self->string);
1824 Py_DECREF(self->pattern);
1825 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001826}
1827
1828static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001829match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001830{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001831 if (index < 0 || index >= self->groups) {
1832 /* raise IndexError if we were given a bad group number */
1833 PyErr_SetString(
1834 PyExc_IndexError,
1835 "no such group"
1836 );
1837 return NULL;
1838 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001839
Fredrik Lundh6f013982000-07-03 18:44:21 +00001840 index *= 2;
1841
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001842 if (self->string == Py_None || self->mark[index] < 0) {
1843 /* return default value if the string or group is undefined */
1844 Py_INCREF(def);
1845 return def;
1846 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001847
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001848 return PySequence_GetSlice(
1849 self->string, self->mark[index], self->mark[index+1]
1850 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001851}
1852
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001853static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001854match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001855{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001856 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001857
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001858 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001859 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001860
Fredrik Lundh6f013982000-07-03 18:44:21 +00001861 i = -1;
1862
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001863 if (self->pattern->groupindex) {
1864 index = PyObject_GetItem(self->pattern->groupindex, index);
1865 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001866 if (PyInt_Check(index))
1867 i = (int) PyInt_AS_LONG(index);
1868 Py_DECREF(index);
1869 } else
1870 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001871 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001872
1873 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001874}
1875
1876static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001877match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001878{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001879 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001880}
1881
1882static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001883match_expand(MatchObject* self, PyObject* args)
1884{
1885 PyObject* template;
1886 if (!PyArg_ParseTuple(args, "O:expand", &template))
1887 return NULL;
1888
1889 /* delegate to Python code */
1890 return call(
1891 "_expand",
1892 Py_BuildValue("OOO", self->pattern, self, template)
1893 );
1894}
1895
1896static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001897match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001898{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001899 PyObject* result;
1900 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001901
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001902 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001903
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001904 switch (size) {
1905 case 0:
1906 result = match_getslice(self, Py_False, Py_None);
1907 break;
1908 case 1:
1909 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1910 break;
1911 default:
1912 /* fetch multiple items */
1913 result = PyTuple_New(size);
1914 if (!result)
1915 return NULL;
1916 for (i = 0; i < size; i++) {
1917 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001918 self, PyTuple_GET_ITEM(args, i), Py_None
1919 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001920 if (!item) {
1921 Py_DECREF(result);
1922 return NULL;
1923 }
1924 PyTuple_SET_ITEM(result, i, item);
1925 }
1926 break;
1927 }
1928 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001929}
1930
1931static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001932match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001933{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 PyObject* result;
1935 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001936
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001937 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001938 static char* kwlist[] = { "default", NULL };
1939 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001940 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001941
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001942 result = PyTuple_New(self->groups-1);
1943 if (!result)
1944 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001945
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001946 for (index = 1; index < self->groups; index++) {
1947 PyObject* item;
1948 item = match_getslice_by_index(self, index, def);
1949 if (!item) {
1950 Py_DECREF(result);
1951 return NULL;
1952 }
1953 PyTuple_SET_ITEM(result, index-1, item);
1954 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001955
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001956 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001957}
1958
1959static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001960match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001961{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001962 PyObject* result;
1963 PyObject* keys;
1964 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001965
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001966 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001967 static char* kwlist[] = { "default", NULL };
1968 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001969 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001970
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001971 result = PyDict_New();
1972 if (!result || !self->pattern->groupindex)
1973 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001974
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001975 keys = PyMapping_Keys(self->pattern->groupindex);
1976 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001977 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001978 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001979 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001980
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001981 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1982 PyObject* key;
1983 PyObject* item;
1984 key = PyList_GET_ITEM(keys, index);
1985 if (!key) {
1986 Py_DECREF(keys);
1987 Py_DECREF(result);
1988 return NULL;
1989 }
1990 item = match_getslice(self, key, def);
1991 if (!item) {
1992 Py_DECREF(key);
1993 Py_DECREF(keys);
1994 Py_DECREF(result);
1995 return NULL;
1996 }
1997 /* FIXME: <fl> this can fail, right? */
1998 PyDict_SetItem(result, key, item);
1999 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002000
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002001 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002002
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002003 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002004}
2005
2006static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002007match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002008{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002009 int index;
2010
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002011 PyObject* index_ = Py_False; /* zero */
2012 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2013 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002014
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002015 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002016
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002017 if (index < 0 || index >= self->groups) {
2018 PyErr_SetString(
2019 PyExc_IndexError,
2020 "no such group"
2021 );
2022 return NULL;
2023 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002024
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002025 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002026 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002027}
2028
2029static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002030match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002031{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002032 int index;
2033
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002034 PyObject* index_ = Py_False; /* zero */
2035 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2036 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002037
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002038 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002039
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002040 if (index < 0 || index >= self->groups) {
2041 PyErr_SetString(
2042 PyExc_IndexError,
2043 "no such group"
2044 );
2045 return NULL;
2046 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002047
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002048 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002049 return Py_BuildValue("i", self->mark[index*2+1]);
2050}
2051
2052LOCAL(PyObject*)
2053_pair(int i1, int i2)
2054{
2055 PyObject* pair;
2056 PyObject* item;
2057
2058 pair = PyTuple_New(2);
2059 if (!pair)
2060 return NULL;
2061
2062 item = PyInt_FromLong(i1);
2063 if (!item)
2064 goto error;
2065 PyTuple_SET_ITEM(pair, 0, item);
2066
2067 item = PyInt_FromLong(i2);
2068 if (!item)
2069 goto error;
2070 PyTuple_SET_ITEM(pair, 1, item);
2071
2072 return pair;
2073
2074 error:
2075 Py_DECREF(pair);
2076 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002077}
2078
2079static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002080match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002081{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002082 int index;
2083
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002084 PyObject* index_ = Py_False; /* zero */
2085 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2086 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002087
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002088 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002089
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002090 if (index < 0 || index >= self->groups) {
2091 PyErr_SetString(
2092 PyExc_IndexError,
2093 "no such group"
2094 );
2095 return NULL;
2096 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002097
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002098 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002099 return _pair(self->mark[index*2], self->mark[index*2+1]);
2100}
2101
2102static PyObject*
2103match_regs(MatchObject* self)
2104{
2105 PyObject* regs;
2106 PyObject* item;
2107 int index;
2108
2109 regs = PyTuple_New(self->groups);
2110 if (!regs)
2111 return NULL;
2112
2113 for (index = 0; index < self->groups; index++) {
2114 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2115 if (!item) {
2116 Py_DECREF(regs);
2117 return NULL;
2118 }
2119 PyTuple_SET_ITEM(regs, index, item);
2120 }
2121
2122 Py_INCREF(regs);
2123 self->regs = regs;
2124
2125 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002126}
2127
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002128static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002129 {"group", (PyCFunction) match_group, METH_VARARGS},
2130 {"start", (PyCFunction) match_start, METH_VARARGS},
2131 {"end", (PyCFunction) match_end, METH_VARARGS},
2132 {"span", (PyCFunction) match_span, METH_VARARGS},
2133 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2134 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2135 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002137};
2138
2139static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002140match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002141{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002142 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002143
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2145 if (res)
2146 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002147
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002148 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002149
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002151 if (self->lastindex >= 0)
2152 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002153 Py_INCREF(Py_None);
2154 return Py_None;
2155 }
2156
2157 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002158 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002159 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002160 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002161 );
2162 if (result)
2163 return result;
2164 PyErr_Clear();
2165 }
2166 Py_INCREF(Py_None);
2167 return Py_None;
2168 }
2169
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002170 if (!strcmp(name, "string")) {
2171 if (self->string) {
2172 Py_INCREF(self->string);
2173 return self->string;
2174 } else {
2175 Py_INCREF(Py_None);
2176 return Py_None;
2177 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002178 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002179
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 if (!strcmp(name, "regs")) {
2181 if (self->regs) {
2182 Py_INCREF(self->regs);
2183 return self->regs;
2184 } else
2185 return match_regs(self);
2186 }
2187
2188 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002189 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002190 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002191 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002192
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002193 if (!strcmp(name, "pos"))
2194 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002195
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002196 if (!strcmp(name, "endpos"))
2197 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002198
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002199 PyErr_SetString(PyExc_AttributeError, name);
2200 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002201}
2202
2203/* FIXME: implement setattr("string", None) as a special case (to
2204 detach the associated string, if any */
2205
2206statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 PyObject_HEAD_INIT(NULL)
2208 0, "SRE_Match",
2209 sizeof(MatchObject), sizeof(int),
2210 (destructor)match_dealloc, /*tp_dealloc*/
2211 0, /*tp_print*/
2212 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002213};
2214
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002215/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002216/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002217
2218static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002219scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002220{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002221 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002222 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002223 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002224}
2225
2226static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002227scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002228{
2229 SRE_STATE* state = &self->state;
2230 PyObject* match;
2231 int status;
2232
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002233 state_reset(state);
2234
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002235 state->ptr = state->start;
2236
2237 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002238 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002239 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002240#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002241 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002242#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002243 }
2244
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002245 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002246 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002247
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002248 if (status == 0 || state->ptr == state->start)
2249 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002250 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002251 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002252
2253 return match;
2254}
2255
2256
2257static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002258scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002259{
2260 SRE_STATE* state = &self->state;
2261 PyObject* match;
2262 int status;
2263
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002264 state_reset(state);
2265
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002266 state->ptr = state->start;
2267
2268 if (state->charsize == 1) {
2269 status = sre_search(state, PatternObject_GetCode(self->pattern));
2270 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002271#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002272 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002273#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002274 }
2275
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002276 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002277 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002278
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002279 if (status == 0 || state->ptr == state->start)
2280 state->start = (void*) ((char*) state->ptr + state->charsize);
2281 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002282 state->start = state->ptr;
2283
2284 return match;
2285}
2286
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002287static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002288 {"match", (PyCFunction) scanner_match, 0},
2289 {"search", (PyCFunction) scanner_search, 0},
2290 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002291};
2292
2293static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002294scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002295{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002296 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002297
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002298 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2299 if (res)
2300 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002301
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002302 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002303
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002304 /* attributes */
2305 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002306 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002307 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002308 }
2309
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002310 PyErr_SetString(PyExc_AttributeError, name);
2311 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002312}
2313
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002314statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002315 PyObject_HEAD_INIT(NULL)
2316 0, "SRE_Scanner",
2317 sizeof(ScannerObject), 0,
2318 (destructor)scanner_dealloc, /*tp_dealloc*/
2319 0, /*tp_print*/
2320 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002321};
2322
Guido van Rossumb700df92000-03-31 14:59:30 +00002323static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002324 {"compile", _compile, 1},
2325 {"getcodesize", sre_codesize, 1},
2326 {"getlower", sre_getlower, 1},
2327 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002328};
2329
2330void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002331#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002332__declspec(dllexport)
2333#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002334init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002335{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002336 /* Patch object types */
2337 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002338 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002339
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002340 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002341}
2342
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002343#endif /* !defined(SRE_RECURSIVE) */