blob: d3841d5c68366cf472b65db7b824a0da05ece101 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00007 * 99-10-24 fl created (based on existing template matcher code)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00008 * 00-03-06 fl first alpha, sort of (0.5)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00009 * 00-06-30 fl added fast search optimization (0.9.3)
10 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11 * 00-07-02 fl added charset optimizations, etc (0.9.5)
12 * 00-07-03 fl store code in pattern object, lookbehind, etc
13 * 00-07-08 fl added regs attribute
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000014 * 00-07-21 fl reset lastindex in scanner methods (0.9.6)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000015 * 00-08-01 fl fixes for 1.6b1 (0.9.8)
Fredrik Lundh96ab4652000-08-03 16:29:50 +000016 * 00-08-02 fl moved SRE_COUNT out of the match method
17 * 00-08-03 fl added recursion limit
Guido van Rossumb700df92000-03-31 14:59:30 +000018 *
19 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
20 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000021 * This version of the SRE library can be redistributed under CNRI's
22 * Python 1.6 license. For any other use, please contact Secret Labs
23 * AB (info@pythonware.com).
24 *
Guido van Rossumb700df92000-03-31 14:59:30 +000025 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000026 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000027 * other compatibility work.
28 */
29
30#ifndef SRE_RECURSIVE
31
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000032char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000033
34#include "Python.h"
35
36#include "sre.h"
37
Guido van Rossumb700df92000-03-31 14:59:30 +000038#if defined(HAVE_LIMITS_H)
39#include <limits.h>
40#else
41#define INT_MAX 2147483647
42#endif
43
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000044#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000045
Fredrik Lundh436c3d52000-06-29 08:58:44 +000046/* name of this module, minus the leading underscore */
47#define MODULE "sre"
48
Guido van Rossumb700df92000-03-31 14:59:30 +000049/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000050#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d52000-06-29 08:58:44 +000052#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000053/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000054#define HAVE_UNICODE
55#endif
56
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000057/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000058/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059
Fredrik Lundh96ab4652000-08-03 16:29:50 +000060/* prevent run-away recursion (bad patterns on long strings) */
61#define USE_RECURSION_LIMIT 10000
62
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000064#define USE_FAST_SEARCH
65
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000066/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000067#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068
69/* -------------------------------------------------------------------- */
70
Fredrik Lundh80946112000-06-29 18:03:25 +000071#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000072#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000073#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000074/* fastest possible local call under MSVC */
75#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000076#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000077#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#else
79#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000080#endif
81
82/* error codes */
83#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000084#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000085#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000086#define SRE_ERROR_MEMORY -9 /* out of memory */
87
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000089#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000090#else
91#define TRACE(v)
92#endif
93
Fredrik Lundh436c3d52000-06-29 08:58:44 +000094#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000095
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000096/* -------------------------------------------------------------------- */
97/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000098
Fredrik Lundh436c3d52000-06-29 08:58:44 +000099/* default character predicates (run sre_chars.py to regenerate tables) */
100
101#define SRE_DIGIT_MASK 1
102#define SRE_SPACE_MASK 2
103#define SRE_LINEBREAK_MASK 4
104#define SRE_ALNUM_MASK 8
105#define SRE_WORD_MASK 16
106
107static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1082, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1090, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11025, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1120, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
114
Fredrik Lundhb389df32000-06-29 12:48:37 +0000115static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000011610, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
11727, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
11844, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
11961, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
120108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
121122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
122106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
123120, 121, 122, 123, 124, 125, 126, 127 };
124
Fredrik Lundhb389df32000-06-29 12:48:37 +0000125static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000126{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000127 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000128}
129
130#define SRE_IS_DIGIT(ch)\
131 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
132#define SRE_IS_SPACE(ch)\
133 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
134#define SRE_IS_LINEBREAK(ch)\
135 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
136#define SRE_IS_ALNUM(ch)\
137 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
138#define SRE_IS_WORD(ch)\
139 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000140
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000141/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000142
Fredrik Lundhb389df32000-06-29 12:48:37 +0000143static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000144{
145 return ((ch) < 256 ? tolower((ch)) : ch);
146}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000147#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
148#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
149#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
150#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
151#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
152
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000153/* unicode-specific character predicates */
154
155#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000156static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000157{
158 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
159}
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000160#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
161#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
162#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000163#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000164#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000165#endif
166
Guido van Rossumb700df92000-03-31 14:59:30 +0000167LOCAL(int)
168sre_category(SRE_CODE category, unsigned int ch)
169{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000170 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000171
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000172 case SRE_CATEGORY_DIGIT:
173 return SRE_IS_DIGIT(ch);
174 case SRE_CATEGORY_NOT_DIGIT:
175 return !SRE_IS_DIGIT(ch);
176 case SRE_CATEGORY_SPACE:
177 return SRE_IS_SPACE(ch);
178 case SRE_CATEGORY_NOT_SPACE:
179 return !SRE_IS_SPACE(ch);
180 case SRE_CATEGORY_WORD:
181 return SRE_IS_WORD(ch);
182 case SRE_CATEGORY_NOT_WORD:
183 return !SRE_IS_WORD(ch);
184 case SRE_CATEGORY_LINEBREAK:
185 return SRE_IS_LINEBREAK(ch);
186 case SRE_CATEGORY_NOT_LINEBREAK:
187 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000188
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000189 case SRE_CATEGORY_LOC_WORD:
190 return SRE_LOC_IS_WORD(ch);
191 case SRE_CATEGORY_LOC_NOT_WORD:
192 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000193
194#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 case SRE_CATEGORY_UNI_DIGIT:
196 return SRE_UNI_IS_DIGIT(ch);
197 case SRE_CATEGORY_UNI_NOT_DIGIT:
198 return !SRE_UNI_IS_DIGIT(ch);
199 case SRE_CATEGORY_UNI_SPACE:
200 return SRE_UNI_IS_SPACE(ch);
201 case SRE_CATEGORY_UNI_NOT_SPACE:
202 return !SRE_UNI_IS_SPACE(ch);
203 case SRE_CATEGORY_UNI_WORD:
204 return SRE_UNI_IS_WORD(ch);
205 case SRE_CATEGORY_UNI_NOT_WORD:
206 return !SRE_UNI_IS_WORD(ch);
207 case SRE_CATEGORY_UNI_LINEBREAK:
208 return SRE_UNI_IS_LINEBREAK(ch);
209 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
210 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000211#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000212 }
213 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000214}
215
216/* helpers */
217
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000218static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000219mark_fini(SRE_STATE* state)
220{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000221 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000222 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000223 state->mark_stack = NULL;
224 }
225 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000226}
227
228static int
229mark_save(SRE_STATE* state, int lo, int hi)
230{
231 void* stack;
232 int size;
233 int minsize, newsize;
234
235 if (hi <= lo)
236 return 0;
237
238 size = (hi - lo) + 1;
239
240 newsize = state->mark_stack_size;
241 minsize = state->mark_stack_base + size;
242
243 if (newsize < minsize) {
244 /* create new stack */
245 if (!newsize) {
246 newsize = 512;
247 if (newsize < minsize)
248 newsize = minsize;
249 TRACE(("allocate stack %d\n", newsize));
250 stack = malloc(sizeof(void*) * newsize);
251 } else {
252 /* grow the stack */
253 while (newsize < minsize)
254 newsize += newsize;
255 TRACE(("grow stack to %d\n", newsize));
256 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
257 }
258 if (!stack) {
259 mark_fini(state);
260 return SRE_ERROR_MEMORY;
261 }
262 state->mark_stack = stack;
263 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000264 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000265
266 TRACE(("copy %d:%d to %d\n", lo, hi, state->mark_stack_base));
267
268 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
269 size * sizeof(void*));
270
271 state->mark_stack_base += size;
272
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000273 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000274}
275
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000276static int
277mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000278{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000279 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000280
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000281 if (hi <= lo)
282 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000283
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000284 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000285
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000286 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000287
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000288 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000289
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000290 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
291 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000292
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000293 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000294}
295
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000296/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000297
298#define SRE_CHAR unsigned char
299#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000300#define SRE_COUNT sre_count
Guido van Rossumb700df92000-03-31 14:59:30 +0000301#define SRE_MEMBER sre_member
302#define SRE_MATCH sre_match
303#define SRE_SEARCH sre_search
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000304
305#if defined(HAVE_UNICODE)
306
Guido van Rossumb700df92000-03-31 14:59:30 +0000307#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000308#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000309#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000310
Guido van Rossumb700df92000-03-31 14:59:30 +0000311#undef SRE_SEARCH
312#undef SRE_MATCH
313#undef SRE_MEMBER
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000314#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000315#undef SRE_AT
316#undef SRE_CHAR
317
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000318/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000319
320#define SRE_CHAR Py_UNICODE
321#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000322#define SRE_COUNT sre_ucount
Guido van Rossumb700df92000-03-31 14:59:30 +0000323#define SRE_MEMBER sre_umember
324#define SRE_MATCH sre_umatch
325#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000326#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000327
328#endif /* SRE_RECURSIVE */
329
330/* -------------------------------------------------------------------- */
331/* String matching engine */
332
333/* the following section is compiled twice, with different character
334 settings */
335
336LOCAL(int)
337SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
338{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000339 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000341 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000342
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000343 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000344
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000345 case SRE_AT_BEGINNING:
346 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000347
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000348 case SRE_AT_BEGINNING_LINE:
349 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000350 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000352 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000353 return (((void*) (ptr+1) == state->end &&
354 SRE_IS_LINEBREAK((int) ptr[0])) ||
355 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000357 case SRE_AT_END_LINE:
358 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000359 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000360
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000361 case SRE_AT_BOUNDARY:
362 if (state->beginning == state->end)
363 return 0;
364 that = ((void*) ptr > state->beginning) ?
365 SRE_IS_WORD((int) ptr[-1]) : 0;
366 this = ((void*) ptr < state->end) ?
367 SRE_IS_WORD((int) ptr[0]) : 0;
368 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000369
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000370 case SRE_AT_NON_BOUNDARY:
371 if (state->beginning == state->end)
372 return 0;
373 that = ((void*) ptr > state->beginning) ?
374 SRE_IS_WORD((int) ptr[-1]) : 0;
375 this = ((void*) ptr < state->end) ?
376 SRE_IS_WORD((int) ptr[0]) : 0;
377 return this == that;
378 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000379
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000380 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000381}
382
383LOCAL(int)
Fredrik Lundh0640e112000-06-30 13:55:15 +0000384SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000385{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000386 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000387
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000388 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000389
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000390 for (;;) {
391 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000394 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 if (ch == set[0])
396 return ok;
397 set++;
398 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000399
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000400 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000401 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 if (set[0] <= ch && ch <= set[1])
403 return ok;
404 set += 2;
405 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh3562f112000-07-02 12:00:07 +0000407 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000408 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000409 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
410 return ok;
411 set += 16;
412 break;
413
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000414 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000415 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000416 if (sre_category(set[0], (int) ch))
417 return ok;
418 set += 1;
419 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000420
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000421 case SRE_OP_NEGATE:
422 ok = !ok;
423 break;
424
425 case SRE_OP_FAILURE:
426 return !ok;
427
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000428 default:
429 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000430 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000431 return 0;
432 }
433 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000434}
435
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000436LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
437
438LOCAL(int)
439SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
440{
441 SRE_CODE chr;
442 SRE_CHAR* ptr = state->ptr;
443 SRE_CHAR* end = state->end;
444 int i;
445
446 /* adjust end */
447 if (maxcount < end - ptr && maxcount != 65535)
448 end = ptr + maxcount;
449
450 switch (pattern[0]) {
451
452 case SRE_OP_ANY:
453 /* repeated dot wildcard. */
454 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
455 ptr++;
456 break;
457
458 case SRE_OP_ANY_ALL:
459 /* repeated dot wildcare. skip to the end of the target
460 string, and backtrack from there */
461 ptr = end;
462 break;
463
464 case SRE_OP_LITERAL:
465 /* repeated literal */
466 chr = pattern[1];
467 while (ptr < end && (SRE_CODE) *ptr == chr)
468 ptr++;
469 break;
470
471 case SRE_OP_LITERAL_IGNORE:
472 /* repeated literal */
473 chr = pattern[1];
474 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
475 ptr++;
476 break;
477
478 case SRE_OP_NOT_LITERAL:
479 /* repeated non-literal */
480 chr = pattern[1];
481 while (ptr < end && (SRE_CODE) *ptr != chr)
482 ptr++;
483 break;
484
485 case SRE_OP_NOT_LITERAL_IGNORE:
486 /* repeated non-literal */
487 chr = pattern[1];
488 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
489 ptr++;
490 break;
491
492 case SRE_OP_IN:
493 /* repeated set */
494 while (ptr < end && SRE_MEMBER(pattern + 2, *ptr))
495 ptr++;
496 break;
497
498 default:
499 /* repeated single character pattern */
500 while ((SRE_CHAR*) state->ptr < end) {
501 i = SRE_MATCH(state, pattern, level);
502 if (i < 0)
503 return i;
504 if (!i)
505 break;
506 }
507 return (SRE_CHAR*) state->ptr - ptr;
508 }
509
510 return ptr - (SRE_CHAR*) state->ptr;
511}
512
Guido van Rossumb700df92000-03-31 14:59:30 +0000513LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000514SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000515{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000516 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000517 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000518
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000519 SRE_CHAR* end = state->end;
520 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000521 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000522 SRE_REPEAT* rp;
523 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000524 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000525
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000526 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000527
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000528 TRACE(("%8d: enter %d\n", PTR(ptr), level));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000529
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000530#if defined(USE_RECURSION_LIMIT)
531 if (level > USE_RECURSION_LIMIT)
532 return SRE_ERROR_RECURSION_LIMIT;
533#endif
534
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000535 if (pattern[0] == SRE_OP_INFO) {
536 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000537 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000538 if (pattern[3] && (end - ptr) < pattern[3]) {
539 TRACE(("reject (got %d chars, need %d)\n",
540 (end - ptr), pattern[3]));
541 return 0;
542 }
543 pattern += pattern[1] + 1;
544 }
545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000546 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000547
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000548 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000549
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000550 case SRE_OP_FAILURE:
551 /* immediate failure */
552 TRACE(("%8d: failure\n", PTR(ptr)));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000553 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000554
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000555 case SRE_OP_SUCCESS:
556 /* end of pattern */
557 TRACE(("%8d: success\n", PTR(ptr)));
558 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000559 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000560
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000561 case SRE_OP_AT:
562 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000563 /* <AT> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000564 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
565 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000566 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000567 pattern++;
568 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000569
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000570 case SRE_OP_CATEGORY:
571 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000572 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000573 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000574 *ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000575 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000576 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000577 TRACE(("%8d: category ok\n", PTR(ptr)));
578 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000579 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000580 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000581
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000582 case SRE_OP_LITERAL:
583 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000584 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000585 TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
586 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000587 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000588 pattern++;
589 ptr++;
590 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000591
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000592 case SRE_OP_NOT_LITERAL:
593 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000594 /* <NOT_LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000595 TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
596 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000597 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000598 pattern++;
599 ptr++;
600 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000602 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000603 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000604 /* <ANY> */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000605 TRACE(("%8d: anything (except newline)\n", PTR(ptr)));
606 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
607 return 0;
608 ptr++;
609 break;
610
611 case SRE_OP_ANY_ALL:
612 /* match anything */
613 /* <ANY_ALL> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000614 TRACE(("%8d: anything\n", PTR(ptr)));
615 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000616 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000617 ptr++;
618 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000619
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000620 case SRE_OP_IN:
621 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000622 /* <IN> <skip> <set> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000623 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
624 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000625 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000626 pattern += pattern[0];
627 ptr++;
628 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000629
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000630 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000631 /* match backreference */
632 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
633 i = pattern[0];
634 {
635 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
636 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
637 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000638 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000639 while (p < e) {
640 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000641 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000642 p++; ptr++;
643 }
644 }
645 pattern++;
646 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000647
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000648 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000649 /* match backreference */
650 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
651 i = pattern[0];
652 {
653 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
654 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
655 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000656 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000657 while (p < e) {
658 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000659 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000660 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000661 p++; ptr++;
662 }
663 }
664 pattern++;
665 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000666
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000667 case SRE_OP_LITERAL_IGNORE:
668 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
669 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000670 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000671 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000672 pattern++;
673 ptr++;
674 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 case SRE_OP_NOT_LITERAL_IGNORE:
677 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
678 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000679 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000680 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000681 pattern++;
682 ptr++;
683 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000685 case SRE_OP_IN_IGNORE:
686 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
687 if (ptr >= end
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000688 || !SRE_MEMBER(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000689 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000690 pattern += pattern[0];
691 ptr++;
692 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000693
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000694 case SRE_OP_MARK:
695 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000696 /* <MARK> <gid> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000697 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000698 i = pattern[0];
699 if (i & 1)
700 state->lastindex = i/2 + 1;
701 if (i > state->lastmark)
702 state->lastmark = i;
703 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000704 pattern++;
705 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000706
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000707 case SRE_OP_JUMP:
708 case SRE_OP_INFO:
709 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000710 /* <JUMP> <offset> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000711 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
712 pattern += pattern[0];
713 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000714
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000715 case SRE_OP_ASSERT:
716 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000717 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000718 TRACE(("%8d: assert subpattern %d\n", PTR(ptr), pattern[1]));
719 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000720 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000721 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000722 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000723 if (i <= 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000724 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000725 if (pattern[1] > 0 && state->ptr != ptr)
726 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 pattern += pattern[0];
728 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000729
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000730 case SRE_OP_ASSERT_NOT:
731 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000732 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000733 TRACE(("%8d: assert not subpattern %d\n", PTR(ptr), pattern[1]));
734 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000735 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000736 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000737 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000738 if (i < 0)
739 return i;
740 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000741 return 0;
742 if (pattern[1] > 0 && state->ptr != ptr)
743 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000744 pattern += pattern[0];
745 break;
746
747 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000748 /* alternation */
749 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000750 TRACE(("%8d: branch\n", PTR(ptr)));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000751 lastmark = state->lastmark;
752 while (pattern[0]) {
753 SRE_CODE* code = pattern+1;
754 TRACE(("%8d: try branch\n", PTR(ptr)));
755 switch (code[0]) {
756 case SRE_OP_IN:
757 if (ptr >= end || !SRE_MEMBER(code + 2, ptr[0]))
758 break;
759 code += code[1] + 1;
760 state->ptr = ptr + 1;
761 goto branch;
762 case SRE_OP_LITERAL:
763 if (ptr >= end || (SRE_CODE) ptr[0] != code[1])
764 break;
765 code += 2;
766 state->ptr = ptr + 1;
767 goto branch;
768 default:
769 state->ptr = ptr;
770 branch:
771 i = SRE_MATCH(state, code, level + 1);
772 if (i)
773 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000774 while (state->lastmark > lastmark)
775 state->mark[state->lastmark--] = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000776 }
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000777 pattern += pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000778 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000779 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000780
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000781 case SRE_OP_REPEAT_ONE:
782 /* match repeated sequence (maximizing regexp) */
783
784 /* this operator only works if the repeated item is
785 exactly one character wide, and we're not already
786 collecting backtracking points. for other cases,
787 use the MAX_REPEAT operator instead */
788
789 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
790
791 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
792 pattern[1], pattern[2]));
793
Fredrik Lundhe1869832000-08-01 22:47:49 +0000794 if (ptr + pattern[1] > end)
795 return 0; /* cannot match */
796
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000797 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000798
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000799 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
800 if (count < 0)
801 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000802
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000803 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000804
805 /* when we arrive here, count contains the number of
806 matches, and ptr points to the tail of the target
807 string. check if the rest of the pattern matches,
808 and backtrack if not. */
809
810 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
811
812 if (count < (int) pattern[1])
813 return 0;
814
815 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
816 /* tail is empty. we're finished */
817 TRACE(("%8d: tail is empty\n", PTR(ptr)));
818 state->ptr = ptr;
819 return 1;
820
821 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
822 /* tail starts with a literal. skip positions where
823 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000824 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000825 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
826 for (;;) {
827 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
828 while (count >= (int) pattern[1] &&
829 (ptr >= end || *ptr != chr)) {
830 ptr--;
831 count--;
832 }
833 TRACE(("%8d: check tail\n", PTR(ptr)));
834 if (count < (int) pattern[1])
835 break;
836 state->ptr = ptr;
837 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
838 if (i > 0) {
839 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
840 return 1;
841 }
842 ptr--;
843 count--;
844 }
845
846 } else {
847 /* general case */
848 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
849 while (count >= (int) pattern[1]) {
850 state->ptr = ptr;
851 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
852 if (i < 0)
853 return i;
854 if (i) {
855 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
856 return 1;
857 }
858 ptr--;
859 count--;
860 }
861 }
862 return 0;
863
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000864 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000865 /* create repeat context. all the hard work is done
866 by the UNTIL operator */
867 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
868 TRACE(("%8d: repeat {%d,%d}\n", PTR(ptr),
869 pattern[1], pattern[2]));
870
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000871 rep.count = -1;
872 rep.pattern = pattern;
873
874 /* install new repeat context */
875 rep.prev = state->repeat;
876 state->repeat = &rep;
877
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000878 state->ptr = ptr;
879 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000880
881 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000882
883 return i;
884
885 case SRE_OP_MAX_UNTIL:
886 /* maximizing repeat */
887 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
888
889 /* FIXME: we probably need to deal with zero-width
890 matches in here... */
891
892 rp = state->repeat;
893 if (!rp)
894 return SRE_ERROR_STATE;
895
896 state->ptr = ptr;
897
898 count = rp->count + 1;
899
900 TRACE(("%8d: max until %d\n", PTR(ptr), count));
901
902 if (count < rp->pattern[1]) {
903 /* not enough matches */
904 TRACE(("%8d: match item (required)\n", PTR(ptr)));
905 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000906 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000907 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000908 if (i)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000909 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000910 rp->count = count - 1;
911 state->ptr = ptr;
912 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000913 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000914
915 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
916 TRACE(("%8d: match item (optional)\n", PTR(ptr)));
917 /* we may have enough matches, but if we can
918 match another item, do so */
919 rp->count = count;
920 lastmark = state->lastmark;
921 mark_save(state, 0, lastmark);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000922 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000923 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000924 if (i)
925 return i;
926 mark_restore(state, 0, lastmark);
927 rp->count = count - 1;
928 state->ptr = ptr;
929 }
930
931 /* cannot match more repeated items here. make sure the
932 tail matches */
933 TRACE(("%8d: match tail\n", PTR(ptr)));
934 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000935 i = SRE_MATCH(state, pattern, level + 1);
936 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000937 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000938 state->repeat = rp;
939 return 0;
940
941 case SRE_OP_MIN_UNTIL:
942 /* minimizing repeat */
943 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
944
945 rp = state->repeat;
946 if (!rp)
947 return SRE_ERROR_STATE;
948
949 count = rp->count + 1;
950
951 TRACE(("%8d: min until %d\n", PTR(ptr), count));
952
953 state->ptr = ptr;
954
955 if (count < rp->pattern[1]) {
956 /* not enough matches */
957 TRACE(("%8d: match item (required)\n", PTR(ptr)));
958 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000959 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000960 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000961 if (i)
962 return i;
963 rp->count = count-1;
964 state->ptr = ptr;
965 return 0;
966 }
967
968 /* see if the tail matches */
969 TRACE(("%8d: match tail\n", PTR(ptr)));
970 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000971 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000972 if (i) {
973 /* free(rp); */
974 return i;
975 }
976 state->repeat = rp;
977
978 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
979 return 0;
980
981 TRACE(("%8d: match item (optional)\n", PTR(ptr)));
982 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000983 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000984 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000985 if (i)
986 return i;
987 rp->count = count - 1;
988 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000989
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000990 default:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000991 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
992 return SRE_ERROR_ILLEGAL;
993 }
994 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000995
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000996 /* shouldn't end up here */
997 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000998}
999
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001000LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001001SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1002{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001003 SRE_CHAR* ptr = state->start;
1004 SRE_CHAR* end = state->end;
1005 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001006 int prefix_len = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001007 SRE_CODE* prefix = NULL;
1008 SRE_CODE* charset = NULL;
1009 SRE_CODE* overlap = NULL;
1010 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001011
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001012 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001013 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001014 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001015
1016 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001017
1018 if (pattern[3] > 0) {
1019 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001020 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001021 end -= pattern[3]-1;
1022 if (end <= ptr)
1023 end = ptr+1;
1024 }
1025
Fredrik Lundh3562f112000-07-02 12:00:07 +00001026 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001027 /* pattern starts with a known prefix */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001028 prefix_len = pattern[5];
1029 prefix = pattern + 6;
1030 overlap = prefix + prefix_len - 1;
1031 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001032 /* pattern starts with a character from a known set */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001033 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001034
1035 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001036 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001037
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001038#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001039 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001040 /* pattern starts with a known prefix. use the overlap
1041 table to skip forward as fast as we possibly can */
1042 int i = 0;
1043 end = state->end;
1044 while (ptr < end) {
1045 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001046 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001047 if (!i)
1048 break;
1049 else
1050 i = overlap[i];
1051 } else {
1052 if (++i == prefix_len) {
1053 /* found a potential match */
1054 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
1055 state->start = ptr - prefix_len + 1;
1056 state->ptr = ptr + 1;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001057 if (flags & SRE_INFO_LITERAL)
1058 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001059 status = SRE_MATCH(state, pattern + 2*prefix_len, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001060 if (status != 0)
1061 return status;
1062 /* close but no cigar -- try again */
1063 i = overlap[i];
1064 }
1065 break;
1066 }
1067
1068 }
1069 ptr++;
1070 }
1071 return 0;
1072 }
1073#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001074
Fredrik Lundh3562f112000-07-02 12:00:07 +00001075 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001076 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001077 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001078 SRE_CODE chr = pattern[1];
1079 for (;;) {
1080 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1081 ptr++;
1082 if (ptr == end)
1083 return 0;
1084 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
1085 state->start = ptr;
1086 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001087 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001088 if (status != 0)
1089 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001090 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001091 } else if (charset) {
1092 /* pattern starts with a character from a known set */
1093 for (;;) {
1094 while (ptr < end && !SRE_MEMBER(charset, ptr[0]))
1095 ptr++;
1096 if (ptr == end)
1097 return 0;
1098 TRACE(("%8d: === SEARCH === charset\n", PTR(ptr)));
1099 state->start = ptr;
1100 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001101 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001102 if (status != 0)
1103 break;
1104 }
1105 } else
1106 /* general case */
1107 while (ptr <= end) {
1108 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
1109 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001110 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001111 if (status != 0)
1112 break;
1113 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001114
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001115 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001116}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001117
Guido van Rossumb700df92000-03-31 14:59:30 +00001118
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001119#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001120
1121/* -------------------------------------------------------------------- */
1122/* factories and destructors */
1123
1124/* see sre.h for object declarations */
1125
1126staticforward PyTypeObject Pattern_Type;
1127staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001128staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001129
1130static PyObject *
1131_compile(PyObject* self_, PyObject* args)
1132{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001133 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001134
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001135 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001136 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001137
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001138 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001139 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001140 PyObject* code;
1141 int groups = 0;
1142 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001143 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001144 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001145 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001146 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001147
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001148 code = PySequence_Fast(code, "code argument must be a sequence");
1149 if (!code)
1150 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001151
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001152#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001153 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001154#else
1155 n = PySequence_Length(code);
1156#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001157
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001158 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1159 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001160 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001161 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001162 }
1163
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001164 for (i = 0; i < n; i++) {
1165 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001166 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001167 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001168
1169 Py_DECREF(code);
1170
1171 if (PyErr_Occurred())
1172 return NULL;
1173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001174 Py_INCREF(pattern);
1175 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001176
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001177 self->flags = flags;
1178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001179 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001180
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001181 Py_XINCREF(groupindex);
1182 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001183
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001184 Py_XINCREF(indexgroup);
1185 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001186
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001187 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001188}
1189
1190static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001191sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001192{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001193 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001194}
1195
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001196static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001197sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001198{
1199 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001200 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001201 return NULL;
1202 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001203 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001204#if defined(HAVE_UNICODE)
1205 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001206 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001207#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001208 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001209}
1210
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001211LOCAL(void)
1212state_reset(SRE_STATE* state)
1213{
1214 int i;
1215
1216 state->lastmark = 0;
1217
1218 /* FIXME: dynamic! */
1219 for (i = 0; i < SRE_MARK_SIZE; i++)
1220 state->mark[i] = NULL;
1221
1222 state->lastindex = -1;
1223
1224 state->repeat = NULL;
1225
1226 mark_fini(state);
1227}
1228
Guido van Rossumb700df92000-03-31 14:59:30 +00001229LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001230state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1231 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001232{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001233 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001234
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001235 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001236 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001237 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001238
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001239 memset(state, 0, sizeof(SRE_STATE));
1240
1241 state->lastindex = -1;
1242
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001243 /* get pointer to string buffer */
1244 buffer = string->ob_type->tp_as_buffer;
1245 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1246 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001247 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001248 return NULL;
1249 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001250
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001251 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001252 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1253 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001254 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1255 return NULL;
1256 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001257
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001258 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001259
1260#if PY_VERSION_HEX >= 0x01060000
1261 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001262#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001263 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001264#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001265
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001266 if (PyString_Check(string) || bytes == size)
1267 state->charsize = 1;
1268#if defined(HAVE_UNICODE)
1269 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1270 state->charsize = sizeof(Py_UNICODE);
1271#endif
1272 else {
1273 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1274 return NULL;
1275 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001276
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001277 /* adjust boundaries */
1278 if (start < 0)
1279 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001280 else if (start > size)
1281 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001282
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001283 if (end < 0)
1284 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001285 else if (end > size)
1286 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001287
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001288 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001289
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001290 state->start = (void*) ((char*) ptr + start * state->charsize);
1291 state->end = (void*) ((char*) ptr + end * state->charsize);
1292
1293 Py_INCREF(string);
1294 state->string = string;
1295 state->pos = start;
1296 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001297
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001298 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001299 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001300#if defined(HAVE_UNICODE)
1301 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001302 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001303#endif
1304 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001305 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001306
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001307 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001308}
1309
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001310LOCAL(void)
1311state_fini(SRE_STATE* state)
1312{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001313 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001314 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001315}
1316
1317LOCAL(PyObject*)
1318state_getslice(SRE_STATE* state, int index, PyObject* string)
1319{
1320 index = (index - 1) * 2;
1321
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001322 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1323 Py_INCREF(Py_None);
1324 return Py_None;
1325 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001326
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001327 return PySequence_GetSlice(
1328 string,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001329 ((char*)state->mark[index] - (char*)state->beginning) /
1330 state->charsize,
1331 ((char*)state->mark[index+1] - (char*)state->beginning) /
1332 state->charsize
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001333 );
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001334}
1335
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001336static void
1337pattern_error(int status)
1338{
1339 switch (status) {
1340 case SRE_ERROR_RECURSION_LIMIT:
1341 PyErr_SetString(
1342 PyExc_RuntimeError,
1343 "maximum recursion limit exceeded"
1344 );
1345 break;
1346 case SRE_ERROR_MEMORY:
1347 PyErr_NoMemory();
1348 break;
1349 default:
1350 /* other error codes indicate compiler/engine bugs */
1351 PyErr_SetString(
1352 PyExc_RuntimeError,
1353 "internal error in regular expression engine"
1354 );
1355 }
1356}
1357
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001358static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001359pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001360{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001361 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001362
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001363 MatchObject* match;
1364 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001365 char* base;
1366 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001367
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001368 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001369
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001370 /* create match object (with room for extra group marks) */
1371 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001372 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001373 if (!match)
1374 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001375
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001376 Py_INCREF(pattern);
1377 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001378
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001379 Py_INCREF(state->string);
1380 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001382 match->regs = NULL;
1383 match->groups = pattern->groups+1;
1384
1385 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001386
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001387 base = (char*) state->beginning;
1388 n = state->charsize;
1389
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001390 match->mark[0] = ((char*) state->start - base) / n;
1391 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001393 for (i = j = 0; i < pattern->groups; i++, j+=2)
1394 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1395 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1396 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1397 } else
1398 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1399
1400 match->pos = state->pos;
1401 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001402
Fredrik Lundh6f013982000-07-03 18:44:21 +00001403 match->lastindex = state->lastindex;
1404
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001405 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001406
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001407 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001408
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001409 /* no match */
1410 Py_INCREF(Py_None);
1411 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001413 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001414
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001415 /* internal error */
1416 pattern_error(status);
1417 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001418}
1419
1420static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001421pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001422{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001423 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001424
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001425 ScannerObject* self;
1426
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001427 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001428 int start = 0;
1429 int end = INT_MAX;
1430 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1431 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001433 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001434 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001435 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001436 return NULL;
1437
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001438 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001439 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001440 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001441 return NULL;
1442 }
1443
1444 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001445 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001446
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001447 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001448}
1449
Guido van Rossumb700df92000-03-31 14:59:30 +00001450static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001451pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001452{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001453 Py_XDECREF(self->pattern);
1454 Py_XDECREF(self->groupindex);
1455 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001456}
1457
1458static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001459pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001460{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001461 SRE_STATE state;
1462 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001463
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001464 PyObject* string;
1465 int start = 0;
1466 int end = INT_MAX;
1467 if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
1468 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001469
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001470 string = state_init(&state, self, string, start, end);
1471 if (!string)
1472 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001473
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001474 state.ptr = state.start;
1475
1476 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001477 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001478 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001479#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001480 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001481#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001482 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001483
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001484 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001485
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001486 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001487}
1488
1489static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001490pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001491{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001492 SRE_STATE state;
1493 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001494
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001495 PyObject* string;
1496 int start = 0;
1497 int end = INT_MAX;
1498 if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
1499 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001500
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001501 string = state_init(&state, self, string, start, end);
1502 if (!string)
1503 return NULL;
1504
1505 if (state.charsize == 1) {
1506 status = sre_search(&state, PatternObject_GetCode(self));
1507 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001508#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001509 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001510#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001511 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001512
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001513 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001514
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001515 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001516}
1517
1518static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001519call(char* function, PyObject* args)
1520{
1521 PyObject* name;
1522 PyObject* module;
1523 PyObject* func;
1524 PyObject* result;
1525
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001526 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001527 if (!name)
1528 return NULL;
1529 module = PyImport_Import(name);
1530 Py_DECREF(name);
1531 if (!module)
1532 return NULL;
1533 func = PyObject_GetAttrString(module, function);
1534 Py_DECREF(module);
1535 if (!func)
1536 return NULL;
1537 result = PyObject_CallObject(func, args);
1538 Py_DECREF(func);
1539 Py_DECREF(args);
1540 return result;
1541}
1542
1543static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001544pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001545{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001546 PyObject* template;
1547 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001548 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001549 if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
1550 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001551
1552 /* delegate to Python code */
1553 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1554}
1555
1556static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001557pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001558{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001559 PyObject* template;
1560 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001561 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001562 if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
1563 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001564
1565 /* delegate to Python code */
1566 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1567}
1568
1569static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001570pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001571{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001573 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001574 if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
1575 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001576
1577 /* delegate to Python code */
1578 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1579}
1580
1581static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001582pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001583{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001584 SRE_STATE state;
1585 PyObject* list;
1586 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001587 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001588
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001589 PyObject* string;
1590 int start = 0;
1591 int end = INT_MAX;
1592 if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
1593 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001594
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001595 string = state_init(&state, self, string, start, end);
1596 if (!string)
1597 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001598
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001599 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001601 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001602
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001603 PyObject* item;
1604
1605 state.ptr = state.start;
1606
1607 if (state.charsize == 1) {
1608 status = sre_search(&state, PatternObject_GetCode(self));
1609 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001610#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001611 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001612#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001613 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001614
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001615 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001616
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001617 /* don't bother to build a match object */
1618 switch (self->groups) {
1619 case 0:
1620 item = PySequence_GetSlice(
1621 string,
1622 ((char*) state.start - (char*) state.beginning) /
1623 state.charsize,
1624 ((char*) state.ptr - (char*) state.beginning) /
1625 state.charsize);
1626 if (!item)
1627 goto error;
1628 break;
1629 case 1:
1630 item = state_getslice(&state, 1, string);
1631 if (!item)
1632 goto error;
1633 break;
1634 default:
1635 item = PyTuple_New(self->groups);
1636 if (!item)
1637 goto error;
1638 for (i = 0; i < self->groups; i++) {
1639 PyObject* o = state_getslice(&state, i+1, string);
1640 if (!o) {
1641 Py_DECREF(item);
1642 goto error;
1643 }
1644 PyTuple_SET_ITEM(item, i, o);
1645 }
1646 break;
1647 }
1648
1649 if (PyList_Append(list, item) < 0) {
1650 Py_DECREF(item);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001651 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001652 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001653
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001654 if (state.ptr == state.start)
1655 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001656 else
1657 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001658
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001659 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001660
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001661 if (status == 0)
1662 break;
1663
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001664 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001665 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001666
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001667 }
1668 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001669
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001670 state_fini(&state);
1671 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001672
1673error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001674 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001675 state_fini(&state);
1676 return NULL;
1677
Guido van Rossumb700df92000-03-31 14:59:30 +00001678}
1679
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001680static PyMethodDef pattern_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001681 {"match", (PyCFunction) pattern_match, 1},
1682 {"search", (PyCFunction) pattern_search, 1},
1683 {"sub", (PyCFunction) pattern_sub, 1},
1684 {"subn", (PyCFunction) pattern_subn, 1},
1685 {"split", (PyCFunction) pattern_split, 1},
1686 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001687 /* experimental */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001688 {"scanner", (PyCFunction) pattern_scanner, 1},
1689 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001690};
1691
1692static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001693pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001694{
1695 PyObject* res;
1696
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001697 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001698
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001699 if (res)
1700 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001701
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001703
1704 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001705 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001706 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001707 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001708 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001709
1710 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001711 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001712
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001713 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001714 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001715
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001716 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001717 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001718 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001719 }
1720
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001721 PyErr_SetString(PyExc_AttributeError, name);
1722 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001723}
1724
1725statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001726 PyObject_HEAD_INIT(NULL)
1727 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001728 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001729 (destructor)pattern_dealloc, /*tp_dealloc*/
1730 0, /*tp_print*/
1731 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001732};
1733
1734/* -------------------------------------------------------------------- */
1735/* match methods */
1736
1737static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001738match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001739{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001740 Py_XDECREF(self->regs);
1741 Py_XDECREF(self->string);
1742 Py_DECREF(self->pattern);
1743 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001744}
1745
1746static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001747match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001748{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001749 if (index < 0 || index >= self->groups) {
1750 /* raise IndexError if we were given a bad group number */
1751 PyErr_SetString(
1752 PyExc_IndexError,
1753 "no such group"
1754 );
1755 return NULL;
1756 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001757
Fredrik Lundh6f013982000-07-03 18:44:21 +00001758 index *= 2;
1759
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001760 if (self->string == Py_None || self->mark[index] < 0) {
1761 /* return default value if the string or group is undefined */
1762 Py_INCREF(def);
1763 return def;
1764 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001765
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001766 return PySequence_GetSlice(
1767 self->string, self->mark[index], self->mark[index+1]
1768 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001769}
1770
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001771static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001772match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001773{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001774 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001775
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001776 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001777 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001778
Fredrik Lundh6f013982000-07-03 18:44:21 +00001779 i = -1;
1780
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001781 if (self->pattern->groupindex) {
1782 index = PyObject_GetItem(self->pattern->groupindex, index);
1783 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001784 if (PyInt_Check(index))
1785 i = (int) PyInt_AS_LONG(index);
1786 Py_DECREF(index);
1787 } else
1788 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001789 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001790
1791 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001792}
1793
1794static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001795match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001796{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001797 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001798}
1799
1800static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001801match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001802{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001803 PyObject* result;
1804 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001805
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001806 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001807
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001808 switch (size) {
1809 case 0:
1810 result = match_getslice(self, Py_False, Py_None);
1811 break;
1812 case 1:
1813 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1814 break;
1815 default:
1816 /* fetch multiple items */
1817 result = PyTuple_New(size);
1818 if (!result)
1819 return NULL;
1820 for (i = 0; i < size; i++) {
1821 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001822 self, PyTuple_GET_ITEM(args, i), Py_None
1823 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001824 if (!item) {
1825 Py_DECREF(result);
1826 return NULL;
1827 }
1828 PyTuple_SET_ITEM(result, i, item);
1829 }
1830 break;
1831 }
1832 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001833}
1834
1835static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001836match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001837{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001838 PyObject* result;
1839 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001840
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001841 PyObject* def = Py_None;
1842 if (!PyArg_ParseTuple(args, "|O:groups", &def))
1843 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001845 result = PyTuple_New(self->groups-1);
1846 if (!result)
1847 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001848
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001849 for (index = 1; index < self->groups; index++) {
1850 PyObject* item;
1851 item = match_getslice_by_index(self, index, def);
1852 if (!item) {
1853 Py_DECREF(result);
1854 return NULL;
1855 }
1856 PyTuple_SET_ITEM(result, index-1, item);
1857 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001858
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001859 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001860}
1861
1862static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001863match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001864{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001865 PyObject* result;
1866 PyObject* keys;
1867 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001868
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001869 PyObject* def = Py_None;
1870 if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
1871 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001872
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001873 result = PyDict_New();
1874 if (!result || !self->pattern->groupindex)
1875 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001876
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001877 keys = PyMapping_Keys(self->pattern->groupindex);
1878 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001879 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001880 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001881 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001882
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001883 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1884 PyObject* key;
1885 PyObject* item;
1886 key = PyList_GET_ITEM(keys, index);
1887 if (!key) {
1888 Py_DECREF(keys);
1889 Py_DECREF(result);
1890 return NULL;
1891 }
1892 item = match_getslice(self, key, def);
1893 if (!item) {
1894 Py_DECREF(key);
1895 Py_DECREF(keys);
1896 Py_DECREF(result);
1897 return NULL;
1898 }
1899 /* FIXME: <fl> this can fail, right? */
1900 PyDict_SetItem(result, key, item);
1901 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001902
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001903 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001904
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001905 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001906}
1907
1908static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001909match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001910{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001911 int index;
1912
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001913 PyObject* index_ = Py_False; /* zero */
1914 if (!PyArg_ParseTuple(args, "|O:start", &index_))
1915 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001916
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001917 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001918
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001919 if (index < 0 || index >= self->groups) {
1920 PyErr_SetString(
1921 PyExc_IndexError,
1922 "no such group"
1923 );
1924 return NULL;
1925 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001926
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001927 if (self->mark[index*2] < 0) {
1928 Py_INCREF(Py_None);
1929 return Py_None;
1930 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001931
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001932 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00001933}
1934
1935static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001936match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001937{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001938 int index;
1939
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001940 PyObject* index_ = Py_False; /* zero */
1941 if (!PyArg_ParseTuple(args, "|O:end", &index_))
1942 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001943
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001944 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001945
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001946 if (index < 0 || index >= self->groups) {
1947 PyErr_SetString(
1948 PyExc_IndexError,
1949 "no such group"
1950 );
1951 return NULL;
1952 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001953
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001954 if (self->mark[index*2] < 0) {
1955 Py_INCREF(Py_None);
1956 return Py_None;
1957 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001958
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001959 return Py_BuildValue("i", self->mark[index*2+1]);
1960}
1961
1962LOCAL(PyObject*)
1963_pair(int i1, int i2)
1964{
1965 PyObject* pair;
1966 PyObject* item;
1967
1968 pair = PyTuple_New(2);
1969 if (!pair)
1970 return NULL;
1971
1972 item = PyInt_FromLong(i1);
1973 if (!item)
1974 goto error;
1975 PyTuple_SET_ITEM(pair, 0, item);
1976
1977 item = PyInt_FromLong(i2);
1978 if (!item)
1979 goto error;
1980 PyTuple_SET_ITEM(pair, 1, item);
1981
1982 return pair;
1983
1984 error:
1985 Py_DECREF(pair);
1986 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001987}
1988
1989static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001990match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001991{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001992 int index;
1993
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001994 PyObject* index_ = Py_False; /* zero */
1995 if (!PyArg_ParseTuple(args, "|O:span", &index_))
1996 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001997
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001998 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001999
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002000 if (index < 0 || index >= self->groups) {
2001 PyErr_SetString(
2002 PyExc_IndexError,
2003 "no such group"
2004 );
2005 return NULL;
2006 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002007
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002008 if (self->mark[index*2] < 0) {
2009 Py_INCREF(Py_None);
2010 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002011 return Py_BuildValue("OO", Py_None, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002012 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002013
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002014 return _pair(self->mark[index*2], self->mark[index*2+1]);
2015}
2016
2017static PyObject*
2018match_regs(MatchObject* self)
2019{
2020 PyObject* regs;
2021 PyObject* item;
2022 int index;
2023
2024 regs = PyTuple_New(self->groups);
2025 if (!regs)
2026 return NULL;
2027
2028 for (index = 0; index < self->groups; index++) {
2029 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2030 if (!item) {
2031 Py_DECREF(regs);
2032 return NULL;
2033 }
2034 PyTuple_SET_ITEM(regs, index, item);
2035 }
2036
2037 Py_INCREF(regs);
2038 self->regs = regs;
2039
2040 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002041}
2042
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002043static PyMethodDef match_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002044 {"group", (PyCFunction) match_group, 1},
2045 {"start", (PyCFunction) match_start, 1},
2046 {"end", (PyCFunction) match_end, 1},
2047 {"span", (PyCFunction) match_span, 1},
2048 {"groups", (PyCFunction) match_groups, 1},
2049 {"groupdict", (PyCFunction) match_groupdict, 1},
2050 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002051};
2052
2053static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002054match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002055{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002056 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002057
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2059 if (res)
2060 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002062 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002065 if (self->lastindex >= 0)
2066 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002067 Py_INCREF(Py_None);
2068 return Py_None;
2069 }
2070
2071 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002072 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002073 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002074 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002075 );
2076 if (result)
2077 return result;
2078 PyErr_Clear();
2079 }
2080 Py_INCREF(Py_None);
2081 return Py_None;
2082 }
2083
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002084 if (!strcmp(name, "string")) {
2085 if (self->string) {
2086 Py_INCREF(self->string);
2087 return self->string;
2088 } else {
2089 Py_INCREF(Py_None);
2090 return Py_None;
2091 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002092 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002093
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002094 if (!strcmp(name, "regs")) {
2095 if (self->regs) {
2096 Py_INCREF(self->regs);
2097 return self->regs;
2098 } else
2099 return match_regs(self);
2100 }
2101
2102 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002103 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002104 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002105 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002106
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002107 if (!strcmp(name, "pos"))
2108 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002109
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002110 if (!strcmp(name, "endpos"))
2111 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002112
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002113 PyErr_SetString(PyExc_AttributeError, name);
2114 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002115}
2116
2117/* FIXME: implement setattr("string", None) as a special case (to
2118 detach the associated string, if any */
2119
2120statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 PyObject_HEAD_INIT(NULL)
2122 0, "SRE_Match",
2123 sizeof(MatchObject), sizeof(int),
2124 (destructor)match_dealloc, /*tp_dealloc*/
2125 0, /*tp_print*/
2126 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002127};
2128
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002129/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002130/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002131
2132static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002133scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002134{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002135 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002136 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002137 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002138}
2139
2140static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002141scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002142{
2143 SRE_STATE* state = &self->state;
2144 PyObject* match;
2145 int status;
2146
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002147 state_reset(state);
2148
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002149 state->ptr = state->start;
2150
2151 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002152 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002153 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002154#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002155 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002156#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002157 }
2158
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002159 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002160 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002161
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002162 if (status == 0 || state->ptr == state->start)
2163 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002164 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002165 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002166
2167 return match;
2168}
2169
2170
2171static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002172scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002173{
2174 SRE_STATE* state = &self->state;
2175 PyObject* match;
2176 int status;
2177
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002178 state_reset(state);
2179
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002180 state->ptr = state->start;
2181
2182 if (state->charsize == 1) {
2183 status = sre_search(state, PatternObject_GetCode(self->pattern));
2184 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002185#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002186 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002187#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002188 }
2189
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002190 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002191 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002192
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002193 if (status == 0 || state->ptr == state->start)
2194 state->start = (void*) ((char*) state->ptr + state->charsize);
2195 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002196 state->start = state->ptr;
2197
2198 return match;
2199}
2200
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002201static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002202 {"match", (PyCFunction) scanner_match, 0},
2203 {"search", (PyCFunction) scanner_search, 0},
2204 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002205};
2206
2207static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002208scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002209{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002210 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002211
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002212 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2213 if (res)
2214 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002215
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002216 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002217
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002218 /* attributes */
2219 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002220 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002221 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002222 }
2223
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002224 PyErr_SetString(PyExc_AttributeError, name);
2225 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002226}
2227
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002228statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002229 PyObject_HEAD_INIT(NULL)
2230 0, "SRE_Scanner",
2231 sizeof(ScannerObject), 0,
2232 (destructor)scanner_dealloc, /*tp_dealloc*/
2233 0, /*tp_print*/
2234 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002235};
2236
Guido van Rossumb700df92000-03-31 14:59:30 +00002237static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002238 {"compile", _compile, 1},
2239 {"getcodesize", sre_codesize, 1},
2240 {"getlower", sre_getlower, 1},
2241 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002242};
2243
2244void
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002245#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002246__declspec(dllexport)
2247#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002248init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002249{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002250 /* Patch object types */
2251 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002252 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002253
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002254 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002255}
2256
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002257#endif /* !defined(SRE_RECURSIVE) */