blob: 69bc17114e2cf3d33bb1da1fd9466a442e4ca348 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00007 * 99-10-24 fl created (based on existing template matcher code)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00008 * 00-03-06 fl first alpha, sort of (0.5)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00009 * 00-06-30 fl added fast search optimization (0.9.3)
10 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11 * 00-07-02 fl added charset optimizations, etc (0.9.5)
12 * 00-07-03 fl store code in pattern object, lookbehind, etc
13 * 00-07-08 fl added regs attribute
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000014 * 00-07-21 fl reset lastindex in scanner methods (0.9.6)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000015 * 00-08-01 fl fixes for 1.6b1 (0.9.8)
Guido van Rossumb700df92000-03-31 14:59:30 +000016 *
17 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
18 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000019 * This version of the SRE library can be redistributed under CNRI's
20 * Python 1.6 license. For any other use, please contact Secret Labs
21 * AB (info@pythonware.com).
22 *
Guido van Rossumb700df92000-03-31 14:59:30 +000023 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000024 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000025 * other compatibility work.
26 */
27
28#ifndef SRE_RECURSIVE
29
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000030char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000031
32#include "Python.h"
33
34#include "sre.h"
35
Guido van Rossumb700df92000-03-31 14:59:30 +000036#if defined(HAVE_LIMITS_H)
37#include <limits.h>
38#else
39#define INT_MAX 2147483647
40#endif
41
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000042#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000043
Fredrik Lundh436c3d582000-06-29 08:58:44 +000044/* name of this module, minus the leading underscore */
45#define MODULE "sre"
46
Guido van Rossumb700df92000-03-31 14:59:30 +000047/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000048#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000049
Fredrik Lundh436c3d582000-06-29 08:58:44 +000050#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000051/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052#define HAVE_UNICODE
53#endif
54
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000055/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000056/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000057
58/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000059#define USE_FAST_SEARCH
60
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000061/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000062#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
64/* -------------------------------------------------------------------- */
65
Fredrik Lundh80946112000-06-29 18:03:25 +000066#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000067#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000068#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000069/* fastest possible local call under MSVC */
70#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000072#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000073#else
74#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000075#endif
76
77/* error codes */
78#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000079#define SRE_ERROR_STATE -2 /* illegal state */
Guido van Rossumb700df92000-03-31 14:59:30 +000080#define SRE_ERROR_MEMORY -9 /* out of memory */
81
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000082#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000083#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000084#else
85#define TRACE(v)
86#endif
87
Fredrik Lundh436c3d582000-06-29 08:58:44 +000088#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000089
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000090/* -------------------------------------------------------------------- */
91/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000092
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093/* default character predicates (run sre_chars.py to regenerate tables) */
94
95#define SRE_DIGIT_MASK 1
96#define SRE_SPACE_MASK 2
97#define SRE_LINEBREAK_MASK 4
98#define SRE_ALNUM_MASK 8
99#define SRE_WORD_MASK 16
100
101static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1022, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1030, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
10425, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
10524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1060, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
10724, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
108
Fredrik Lundhb389df32000-06-29 12:48:37 +0000109static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011010, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
11127, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
11244, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
11361, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
114108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
115122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
116106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
117120, 121, 122, 123, 124, 125, 126, 127 };
118
Fredrik Lundhb389df32000-06-29 12:48:37 +0000119static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000120{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000121 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000122}
123
124#define SRE_IS_DIGIT(ch)\
125 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
126#define SRE_IS_SPACE(ch)\
127 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
128#define SRE_IS_LINEBREAK(ch)\
129 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
130#define SRE_IS_ALNUM(ch)\
131 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
132#define SRE_IS_WORD(ch)\
133 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000134
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000135/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000136
Fredrik Lundhb389df32000-06-29 12:48:37 +0000137static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000138{
139 return ((ch) < 256 ? tolower((ch)) : ch);
140}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000141#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
142#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
143#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
144#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
145#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
146
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000147/* unicode-specific character predicates */
148
149#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000150static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000151{
152 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
153}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000154#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
155#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
156#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000157#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000158#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000159#endif
160
Guido van Rossumb700df92000-03-31 14:59:30 +0000161LOCAL(int)
162sre_category(SRE_CODE category, unsigned int ch)
163{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000164 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000165
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000166 case SRE_CATEGORY_DIGIT:
167 return SRE_IS_DIGIT(ch);
168 case SRE_CATEGORY_NOT_DIGIT:
169 return !SRE_IS_DIGIT(ch);
170 case SRE_CATEGORY_SPACE:
171 return SRE_IS_SPACE(ch);
172 case SRE_CATEGORY_NOT_SPACE:
173 return !SRE_IS_SPACE(ch);
174 case SRE_CATEGORY_WORD:
175 return SRE_IS_WORD(ch);
176 case SRE_CATEGORY_NOT_WORD:
177 return !SRE_IS_WORD(ch);
178 case SRE_CATEGORY_LINEBREAK:
179 return SRE_IS_LINEBREAK(ch);
180 case SRE_CATEGORY_NOT_LINEBREAK:
181 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000182
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000183 case SRE_CATEGORY_LOC_WORD:
184 return SRE_LOC_IS_WORD(ch);
185 case SRE_CATEGORY_LOC_NOT_WORD:
186 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000187
188#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000189 case SRE_CATEGORY_UNI_DIGIT:
190 return SRE_UNI_IS_DIGIT(ch);
191 case SRE_CATEGORY_UNI_NOT_DIGIT:
192 return !SRE_UNI_IS_DIGIT(ch);
193 case SRE_CATEGORY_UNI_SPACE:
194 return SRE_UNI_IS_SPACE(ch);
195 case SRE_CATEGORY_UNI_NOT_SPACE:
196 return !SRE_UNI_IS_SPACE(ch);
197 case SRE_CATEGORY_UNI_WORD:
198 return SRE_UNI_IS_WORD(ch);
199 case SRE_CATEGORY_UNI_NOT_WORD:
200 return !SRE_UNI_IS_WORD(ch);
201 case SRE_CATEGORY_UNI_LINEBREAK:
202 return SRE_UNI_IS_LINEBREAK(ch);
203 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
204 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000205#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000206 }
207 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000208}
209
210/* helpers */
211
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000212static void
213mark_init(SRE_STATE* state)
Guido van Rossumb700df92000-03-31 14:59:30 +0000214{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000215 state->mark_stack = NULL;
216 state->mark_stack_size = state->mark_stack_base = 0;
217}
218
219static void
220mark_fini(SRE_STATE* state)
221{
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000222#if 0
223 /* FIXME: debugging */
224 if (state->maxlevel > 0)
225 printf("max %d\n", state->maxlevel);
226 if (state->mark_stack_base > 0)
227 printf("mark stack %d\n", state->mark_stack_base);
228#endif
229
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000230 if (state->mark_stack)
231 free(state->mark_stack);
232 mark_init(state);
233}
234
235static int
236mark_save(SRE_STATE* state, int lo, int hi)
237{
238 void* stack;
239 int size;
240 int minsize, newsize;
241
242 if (hi <= lo)
243 return 0;
244
245 size = (hi - lo) + 1;
246
247 newsize = state->mark_stack_size;
248 minsize = state->mark_stack_base + size;
249
250 if (newsize < minsize) {
251 /* create new stack */
252 if (!newsize) {
253 newsize = 512;
254 if (newsize < minsize)
255 newsize = minsize;
256 TRACE(("allocate stack %d\n", newsize));
257 stack = malloc(sizeof(void*) * newsize);
258 } else {
259 /* grow the stack */
260 while (newsize < minsize)
261 newsize += newsize;
262 TRACE(("grow stack to %d\n", newsize));
263 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
264 }
265 if (!stack) {
266 mark_fini(state);
267 return SRE_ERROR_MEMORY;
268 }
269 state->mark_stack = stack;
270 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000271 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000272
273 TRACE(("copy %d:%d to %d\n", lo, hi, state->mark_stack_base));
274
275 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
276 size * sizeof(void*));
277
278 state->mark_stack_base += size;
279
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000280 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000281}
282
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000283static int
284mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000285{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000286 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000287
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000288 if (hi <= lo)
289 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000290
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000291 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000292
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000293 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000294
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000295 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000296
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000297 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
298 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000299
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000300 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000301}
302
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000303/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000304
305#define SRE_CHAR unsigned char
306#define SRE_AT sre_at
307#define SRE_MEMBER sre_member
308#define SRE_MATCH sre_match
309#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000310
311#if defined(HAVE_UNICODE)
312
Guido van Rossumb700df92000-03-31 14:59:30 +0000313#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000314#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000315#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000316
Guido van Rossumb700df92000-03-31 14:59:30 +0000317#undef SRE_SEARCH
318#undef SRE_MATCH
319#undef SRE_MEMBER
320#undef SRE_AT
321#undef SRE_CHAR
322
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000323/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000324
325#define SRE_CHAR Py_UNICODE
326#define SRE_AT sre_uat
327#define SRE_MEMBER sre_umember
328#define SRE_MATCH sre_umatch
329#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000330#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000331
332#endif /* SRE_RECURSIVE */
333
334/* -------------------------------------------------------------------- */
335/* String matching engine */
336
337/* the following section is compiled twice, with different character
338 settings */
339
340LOCAL(int)
341SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
342{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000343 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000344
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000345 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000346
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000347 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000348
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000349 case SRE_AT_BEGINNING:
350 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000352 case SRE_AT_BEGINNING_LINE:
353 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000354 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000355
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000356 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000357 return (((void*) (ptr+1) == state->end &&
358 SRE_IS_LINEBREAK((int) ptr[0])) ||
359 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000360
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000361 case SRE_AT_END_LINE:
362 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000363 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000364
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000365 case SRE_AT_BOUNDARY:
366 if (state->beginning == state->end)
367 return 0;
368 that = ((void*) ptr > state->beginning) ?
369 SRE_IS_WORD((int) ptr[-1]) : 0;
370 this = ((void*) ptr < state->end) ?
371 SRE_IS_WORD((int) ptr[0]) : 0;
372 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000374 case SRE_AT_NON_BOUNDARY:
375 if (state->beginning == state->end)
376 return 0;
377 that = ((void*) ptr > state->beginning) ?
378 SRE_IS_WORD((int) ptr[-1]) : 0;
379 this = ((void*) ptr < state->end) ?
380 SRE_IS_WORD((int) ptr[0]) : 0;
381 return this == that;
382 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000383
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000384 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000385}
386
387LOCAL(int)
Fredrik Lundh0640e112000-06-30 13:55:15 +0000388SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000389{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000390 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000391
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000392 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000393
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000394 for (;;) {
395 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000396
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 case SRE_OP_NEGATE:
398 ok = !ok;
399 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000400
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 case SRE_OP_FAILURE:
402 return !ok;
Guido van Rossumb700df92000-03-31 14:59:30 +0000403
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000405 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000406 if (ch == set[0])
407 return ok;
408 set++;
409 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000412 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 if (set[0] <= ch && ch <= set[1])
414 return ok;
415 set += 2;
416 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000417
Fredrik Lundh3562f112000-07-02 12:00:07 +0000418 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000419 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000420 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
421 return ok;
422 set += 16;
423 break;
424
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000425 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000426 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000427 if (sre_category(set[0], (int) ch))
428 return ok;
429 set += 1;
430 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000432 default:
433 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000434 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000435 return 0;
436 }
437 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000438}
439
440LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000441SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000442{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000443 /* check if string matches the given pattern. returns -1 for
444 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000445
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000446 SRE_CHAR* end = state->end;
447 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000448 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000449 SRE_REPEAT* rp;
450 int lastmark;
Guido van Rossumb700df92000-03-31 14:59:30 +0000451
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000452 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000453
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000454 TRACE(("%8d: enter %d\n", PTR(ptr), level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000455
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000456 if (pattern[0] == SRE_OP_INFO) {
457 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000458 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000459 if (pattern[3] && (end - ptr) < pattern[3]) {
460 TRACE(("reject (got %d chars, need %d)\n",
461 (end - ptr), pattern[3]));
462 return 0;
463 }
464 pattern += pattern[1] + 1;
465 }
466
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000467 /* FIXME: debugging */
468 if (level > state->maxlevel)
469 state->maxlevel = level;
470
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000471 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000473 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000474
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000475 case SRE_OP_FAILURE:
476 /* immediate failure */
477 TRACE(("%8d: failure\n", PTR(ptr)));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000478 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000479
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000480 case SRE_OP_SUCCESS:
481 /* end of pattern */
482 TRACE(("%8d: success\n", PTR(ptr)));
483 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000484 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000485
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000486 case SRE_OP_AT:
487 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000488 /* <AT> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000489 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
490 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000491 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000492 pattern++;
493 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000494
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000495 case SRE_OP_CATEGORY:
496 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000497 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000498 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000499 *ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000501 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000502 TRACE(("%8d: category ok\n", PTR(ptr)));
503 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000504 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000505 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000506
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000507 case SRE_OP_LITERAL:
508 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000509 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000510 TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
511 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000512 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000513 pattern++;
514 ptr++;
515 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000516
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000517 case SRE_OP_NOT_LITERAL:
518 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000519 /* <NOT_LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000520 TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
521 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000522 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000523 pattern++;
524 ptr++;
525 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000526
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000527 case SRE_OP_ANY:
528 /* match anything */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000529 /* <ANY> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000530 TRACE(("%8d: anything\n", PTR(ptr)));
531 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000532 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000533 ptr++;
534 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000535
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000536 case SRE_OP_IN:
537 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000538 /* <IN> <skip> <set> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000539 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
540 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000541 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000542 pattern += pattern[0];
543 ptr++;
544 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000545
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000546 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000547 /* match backreference */
548 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
549 i = pattern[0];
550 {
551 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
552 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
553 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000554 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000555 while (p < e) {
556 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000557 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000558 p++; ptr++;
559 }
560 }
561 pattern++;
562 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000563
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000564 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000565 /* match backreference */
566 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
567 i = pattern[0];
568 {
569 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
570 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
571 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000572 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000573 while (p < e) {
574 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000575 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000576 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000577 p++; ptr++;
578 }
579 }
580 pattern++;
581 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000582
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000583 case SRE_OP_LITERAL_IGNORE:
584 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
585 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000586 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000587 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000588 pattern++;
589 ptr++;
590 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000591
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000592 case SRE_OP_NOT_LITERAL_IGNORE:
593 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
594 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000595 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000596 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000597 pattern++;
598 ptr++;
599 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000601 case SRE_OP_IN_IGNORE:
602 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
603 if (ptr >= end
604 || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000605 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 pattern += pattern[0];
607 ptr++;
608 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000609
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000610 case SRE_OP_MARK:
611 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000612 /* <MARK> <gid> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000614 i = pattern[0];
615 if (i & 1)
616 state->lastindex = i/2 + 1;
617 if (i > state->lastmark)
618 state->lastmark = i;
619 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000620 pattern++;
621 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000622
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000623 case SRE_OP_JUMP:
624 case SRE_OP_INFO:
625 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000626 /* <JUMP> <offset> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000627 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
628 pattern += pattern[0];
629 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000630
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000631 case SRE_OP_ASSERT:
632 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000633 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000634 TRACE(("%8d: assert subpattern %d\n", PTR(ptr), pattern[1]));
635 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000636 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000637 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000638 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000639 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000640 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000641 if (pattern[1] > 0 && state->ptr != ptr)
642 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000643 pattern += pattern[0];
644 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000645
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000646 case SRE_OP_ASSERT_NOT:
647 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000648 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000649 TRACE(("%8d: assert not subpattern %d\n", PTR(ptr), pattern[1]));
650 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000651 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000652 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000653 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000654 if (i < 0)
655 return i;
656 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000657 return 0;
658 if (pattern[1] > 0 && state->ptr != ptr)
659 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000660 pattern += pattern[0];
661 break;
662
663 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000664 /* alternation */
665 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000666 TRACE(("%8d: branch\n", PTR(ptr)));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000667 {
668 lastmark = state->lastmark;
669 while (pattern[0]) {
670 TRACE(("%8d: try branch\n", PTR(ptr)));
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000671 if (pattern[1] != SRE_OP_LITERAL ||
672 (ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000673 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000674 i = SRE_MATCH(state, pattern + 1, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000675 if (i)
676 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000677 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000678 while (state->lastmark > lastmark)
679 state->mark[state->lastmark--] = NULL;
680 pattern += pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000681 }
682 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000685 case SRE_OP_REPEAT_ONE:
686 /* match repeated sequence (maximizing regexp) */
687
688 /* this operator only works if the repeated item is
689 exactly one character wide, and we're not already
690 collecting backtracking points. for other cases,
691 use the MAX_REPEAT operator instead */
692
693 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
694
695 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
696 pattern[1], pattern[2]));
697
698 count = 0;
699
700 if (pattern[3] == SRE_OP_ANY) {
701 /* repeated wildcard. skip to the end of the target
702 string, and backtrack from there */
703 /* FIXME: must look for line endings */
704 if (ptr + pattern[1] > end)
705 return 0; /* cannot match */
706 count = pattern[2];
707 if (count > end - ptr)
708 count = end - ptr;
709 ptr += count;
710
711 } else if (pattern[3] == SRE_OP_LITERAL) {
712 /* repeated literal */
713 SRE_CODE chr = pattern[4];
714 while (count < (int) pattern[2]) {
715 if (ptr >= end || (SRE_CODE) ptr[0] != chr)
716 break;
717 ptr++;
718 count++;
719 }
720
721 } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
722 /* repeated literal */
723 SRE_CODE chr = pattern[4];
724 while (count < (int) pattern[2]) {
725 if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
726 break;
727 ptr++;
728 count++;
729 }
730
731 } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
732 /* repeated non-literal */
733 SRE_CODE chr = pattern[4];
734 while (count < (int) pattern[2]) {
735 if (ptr >= end || (SRE_CODE) ptr[0] == chr)
736 break;
737 ptr++;
738 count++;
739 }
740
741 } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
742 /* repeated non-literal */
743 SRE_CODE chr = pattern[4];
744 while (count < (int) pattern[2]) {
745 if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
746 break;
747 ptr++;
748 count++;
749 }
750
751 } else if (pattern[3] == SRE_OP_IN) {
752 /* repeated set */
753 while (count < (int) pattern[2]) {
754 if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
755 break;
756 ptr++;
757 count++;
758 }
759
760 } else {
761 /* repeated single character pattern */
762 state->ptr = ptr;
763 while (count < (int) pattern[2]) {
764 i = SRE_MATCH(state, pattern + 3, level + 1);
765 if (i < 0)
766 return i;
767 if (!i)
768 break;
769 count++;
770 }
771 state->ptr = ptr;
772 ptr += count;
773 }
774
775 /* when we arrive here, count contains the number of
776 matches, and ptr points to the tail of the target
777 string. check if the rest of the pattern matches,
778 and backtrack if not. */
779
780 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
781
782 if (count < (int) pattern[1])
783 return 0;
784
785 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
786 /* tail is empty. we're finished */
787 TRACE(("%8d: tail is empty\n", PTR(ptr)));
788 state->ptr = ptr;
789 return 1;
790
791 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
792 /* tail starts with a literal. skip positions where
793 the rest of the pattern cannot possibly match */
794 SRE_CODE chr = pattern[pattern[0]+1];
795 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
796 for (;;) {
797 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
798 while (count >= (int) pattern[1] &&
799 (ptr >= end || *ptr != chr)) {
800 ptr--;
801 count--;
802 }
803 TRACE(("%8d: check tail\n", PTR(ptr)));
804 if (count < (int) pattern[1])
805 break;
806 state->ptr = ptr;
807 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
808 if (i > 0) {
809 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
810 return 1;
811 }
812 ptr--;
813 count--;
814 }
815
816 } else {
817 /* general case */
818 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
819 while (count >= (int) pattern[1]) {
820 state->ptr = ptr;
821 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
822 if (i < 0)
823 return i;
824 if (i) {
825 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
826 return 1;
827 }
828 ptr--;
829 count--;
830 }
831 }
832 return 0;
833
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000834 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000835 /* create repeat context. all the hard work is done
836 by the UNTIL operator */
837 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
838 TRACE(("%8d: repeat {%d,%d}\n", PTR(ptr),
839 pattern[1], pattern[2]));
840
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000841 rep.count = -1;
842 rep.pattern = pattern;
843
844 /* install new repeat context */
845 rep.prev = state->repeat;
846 state->repeat = &rep;
847
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000848 state->ptr = ptr;
849 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000850
851 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000852
853 return i;
854
855 case SRE_OP_MAX_UNTIL:
856 /* maximizing repeat */
857 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
858
859 /* FIXME: we probably need to deal with zero-width
860 matches in here... */
861
862 rp = state->repeat;
863 if (!rp)
864 return SRE_ERROR_STATE;
865
866 state->ptr = ptr;
867
868 count = rp->count + 1;
869
870 TRACE(("%8d: max until %d\n", PTR(ptr), count));
871
872 if (count < rp->pattern[1]) {
873 /* not enough matches */
874 TRACE(("%8d: match item (required)\n", PTR(ptr)));
875 rp->count = count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000876 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000877 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000878 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000879 rp->count = count - 1;
880 state->ptr = ptr;
881 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000882 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000883
884 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
885 TRACE(("%8d: match item (optional)\n", PTR(ptr)));
886 /* we may have enough matches, but if we can
887 match another item, do so */
888 rp->count = count;
889 lastmark = state->lastmark;
890 mark_save(state, 0, lastmark);
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000891 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000892 if (i)
893 return i;
894 mark_restore(state, 0, lastmark);
895 rp->count = count - 1;
896 state->ptr = ptr;
897 }
898
899 /* cannot match more repeated items here. make sure the
900 tail matches */
901 TRACE(("%8d: match tail\n", PTR(ptr)));
902 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000903 i = SRE_MATCH(state, pattern, level + 1);
904 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000905 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000906 state->repeat = rp;
907 return 0;
908
909 case SRE_OP_MIN_UNTIL:
910 /* minimizing repeat */
911 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
912
913 rp = state->repeat;
914 if (!rp)
915 return SRE_ERROR_STATE;
916
917 count = rp->count + 1;
918
919 TRACE(("%8d: min until %d\n", PTR(ptr), count));
920
921 state->ptr = ptr;
922
923 if (count < rp->pattern[1]) {
924 /* not enough matches */
925 TRACE(("%8d: match item (required)\n", PTR(ptr)));
926 rp->count = count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000927 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000928 if (i)
929 return i;
930 rp->count = count-1;
931 state->ptr = ptr;
932 return 0;
933 }
934
935 /* see if the tail matches */
936 TRACE(("%8d: match tail\n", PTR(ptr)));
937 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000938 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000939 if (i) {
940 /* free(rp); */
941 return i;
942 }
943 state->repeat = rp;
944
945 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
946 return 0;
947
948 TRACE(("%8d: match item (optional)\n", PTR(ptr)));
949 rp->count = count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000950 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000951 if (i)
952 return i;
953 rp->count = count - 1;
954 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000955
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000956 default:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000957 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
958 return SRE_ERROR_ILLEGAL;
959 }
960 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000961
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000962 /* shouldn't end up here */
963 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000964}
965
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000966static int
Guido van Rossumb700df92000-03-31 14:59:30 +0000967SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
968{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000969 SRE_CHAR* ptr = state->start;
970 SRE_CHAR* end = state->end;
971 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +0000972 int prefix_len = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +0000973 SRE_CODE* prefix = NULL;
974 SRE_CODE* charset = NULL;
975 SRE_CODE* overlap = NULL;
976 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000977
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000978 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000979 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000980 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000981
982 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000983
984 if (pattern[3] > 0) {
985 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +0000986 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000987 end -= pattern[3]-1;
988 if (end <= ptr)
989 end = ptr+1;
990 }
991
Fredrik Lundh3562f112000-07-02 12:00:07 +0000992 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000993 /* pattern starts with a known prefix */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000994 prefix_len = pattern[5];
995 prefix = pattern + 6;
996 overlap = prefix + prefix_len - 1;
997 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000998 /* pattern starts with a character from a known set */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000999 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001000
1001 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001002 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001003
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001004#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001005 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001006 /* pattern starts with a known prefix. use the overlap
1007 table to skip forward as fast as we possibly can */
1008 int i = 0;
1009 end = state->end;
1010 while (ptr < end) {
1011 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001012 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001013 if (!i)
1014 break;
1015 else
1016 i = overlap[i];
1017 } else {
1018 if (++i == prefix_len) {
1019 /* found a potential match */
1020 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
1021 state->start = ptr - prefix_len + 1;
1022 state->ptr = ptr + 1;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001023 if (flags & SRE_INFO_LITERAL)
1024 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001025 status = SRE_MATCH(state, pattern + 2*prefix_len, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001026 if (status != 0)
1027 return status;
1028 /* close but no cigar -- try again */
1029 i = overlap[i];
1030 }
1031 break;
1032 }
1033
1034 }
1035 ptr++;
1036 }
1037 return 0;
1038 }
1039#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001040
Fredrik Lundh3562f112000-07-02 12:00:07 +00001041 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001042 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001043 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001044 SRE_CODE chr = pattern[1];
1045 for (;;) {
1046 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1047 ptr++;
1048 if (ptr == end)
1049 return 0;
1050 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
1051 state->start = ptr;
1052 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001053 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001054 if (status != 0)
1055 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001056 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001057 } else if (charset) {
1058 /* pattern starts with a character from a known set */
1059 for (;;) {
1060 while (ptr < end && !SRE_MEMBER(charset, ptr[0]))
1061 ptr++;
1062 if (ptr == end)
1063 return 0;
1064 TRACE(("%8d: === SEARCH === charset\n", PTR(ptr)));
1065 state->start = ptr;
1066 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001067 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001068 if (status != 0)
1069 break;
1070 }
1071 } else
1072 /* general case */
1073 while (ptr <= end) {
1074 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
1075 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001076 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001077 if (status != 0)
1078 break;
1079 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001080
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001081 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001082}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001083
Guido van Rossumb700df92000-03-31 14:59:30 +00001084
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001085#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001086
1087/* -------------------------------------------------------------------- */
1088/* factories and destructors */
1089
1090/* see sre.h for object declarations */
1091
1092staticforward PyTypeObject Pattern_Type;
1093staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001094staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001095
1096static PyObject *
1097_compile(PyObject* self_, PyObject* args)
1098{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001099 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001101 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001102 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001103
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001104 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001105 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001106 PyObject* code;
1107 int groups = 0;
1108 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001109 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001110 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001111 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001112 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001113
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001114 code = PySequence_Fast(code, "code argument must be a sequence");
1115 if (!code)
1116 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001117
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001118#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001119 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001120#else
1121 n = PySequence_Length(code);
1122#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001123
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001124 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1125 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001126 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001127 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001128 }
1129
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001130 for (i = 0; i < n; i++) {
1131 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001132 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001133 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001134
1135 Py_DECREF(code);
1136
1137 if (PyErr_Occurred())
1138 return NULL;
1139
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001140 Py_INCREF(pattern);
1141 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001142
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001143 self->flags = flags;
1144
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001145 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001146
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001147 Py_XINCREF(groupindex);
1148 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001149
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001150 Py_XINCREF(indexgroup);
1151 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001152
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001153 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001154}
1155
1156static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001157sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001158{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001159 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001160}
1161
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001162static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001163sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001164{
1165 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001166 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001167 return NULL;
1168 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001169 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001170#if defined(HAVE_UNICODE)
1171 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001172 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001173#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001174 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001175}
1176
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001177LOCAL(void)
1178state_reset(SRE_STATE* state)
1179{
1180 int i;
1181
1182 state->lastmark = 0;
1183
1184 /* FIXME: dynamic! */
1185 for (i = 0; i < SRE_MARK_SIZE; i++)
1186 state->mark[i] = NULL;
1187
1188 state->lastindex = -1;
1189
1190 state->repeat = NULL;
1191
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001192 /* FIXME: debugging */
1193 state->maxlevel = 0;
1194
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001195 mark_fini(state);
1196}
1197
Guido van Rossumb700df92000-03-31 14:59:30 +00001198LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001199state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1200 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001201{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001202 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001203
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001204 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001205 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001206 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001207
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001208 /* get pointer to string buffer */
1209 buffer = string->ob_type->tp_as_buffer;
1210 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1211 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001212 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001213 return NULL;
1214 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001215
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001216 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001217 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1218 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001219 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1220 return NULL;
1221 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001222
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001223 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001224
1225#if PY_VERSION_HEX >= 0x01060000
1226 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001227#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001228 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001229#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001230
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001231 if (PyString_Check(string) || bytes == size)
1232 state->charsize = 1;
1233#if defined(HAVE_UNICODE)
1234 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1235 state->charsize = sizeof(Py_UNICODE);
1236#endif
1237 else {
1238 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1239 return NULL;
1240 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001241
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 /* adjust boundaries */
1243 if (start < 0)
1244 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001245 else if (start > size)
1246 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001247
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001248 if (end < 0)
1249 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001250 else if (end > size)
1251 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001252
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001253 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001254
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001255 state->start = (void*) ((char*) ptr + start * state->charsize);
1256 state->end = (void*) ((char*) ptr + end * state->charsize);
1257
1258 Py_INCREF(string);
1259 state->string = string;
1260 state->pos = start;
1261 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001262
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001263 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001264 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001265#if defined(HAVE_UNICODE)
1266 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001267 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001268#endif
1269 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001270 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001271
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001272 state->mark_stack = NULL;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001273 state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001274
1275 state_reset(state);
1276
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001277 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001278}
1279
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001280LOCAL(void)
1281state_fini(SRE_STATE* state)
1282{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001283 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001284 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001285}
1286
1287LOCAL(PyObject*)
1288state_getslice(SRE_STATE* state, int index, PyObject* string)
1289{
1290 index = (index - 1) * 2;
1291
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001292 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1293 Py_INCREF(Py_None);
1294 return Py_None;
1295 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001296
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001297 return PySequence_GetSlice(
1298 string,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001299 ((char*)state->mark[index] - (char*)state->beginning) /
1300 state->charsize,
1301 ((char*)state->mark[index+1] - (char*)state->beginning) /
1302 state->charsize
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001303 );
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001304}
1305
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001306static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001307pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001308{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001309 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001310
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001311 MatchObject* match;
1312 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001313 char* base;
1314 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001315
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001316 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001317
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001318 /* create match object (with room for extra group marks) */
1319 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001320 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001321 if (!match)
1322 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001323
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001324 Py_INCREF(pattern);
1325 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001326
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001327 Py_INCREF(state->string);
1328 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001329
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001330 match->regs = NULL;
1331 match->groups = pattern->groups+1;
1332
1333 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001334
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001335 base = (char*) state->beginning;
1336 n = state->charsize;
1337
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001338 match->mark[0] = ((char*) state->start - base) / n;
1339 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001340
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001341 for (i = j = 0; i < pattern->groups; i++, j+=2)
1342 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1343 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1344 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1345 } else
1346 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1347
1348 match->pos = state->pos;
1349 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001350
Fredrik Lundh6f013982000-07-03 18:44:21 +00001351 match->lastindex = state->lastindex;
1352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001353 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001354
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001355 } else if (status < 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 /* internal error */
1358 PyErr_SetString(
1359 PyExc_RuntimeError, "internal error in regular expression engine"
1360 );
1361 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001362
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001363 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001364
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001365 Py_INCREF(Py_None);
1366 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001367}
1368
1369static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001370pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001371{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001372 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001374 ScannerObject* self;
1375
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001376 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001377 int start = 0;
1378 int end = INT_MAX;
1379 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1380 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001382 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001383 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001384 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001385 return NULL;
1386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001387 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001388 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001389 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001390 return NULL;
1391 }
1392
1393 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001394 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001395
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001396 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001397}
1398
Guido van Rossumb700df92000-03-31 14:59:30 +00001399static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001400pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001401{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001402 Py_XDECREF(self->pattern);
1403 Py_XDECREF(self->groupindex);
1404 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001405}
1406
1407static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001408pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001409{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001410 SRE_STATE state;
1411 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001413 PyObject* string;
1414 int start = 0;
1415 int end = INT_MAX;
1416 if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
1417 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001418
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001419 string = state_init(&state, self, string, start, end);
1420 if (!string)
1421 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001423 state.ptr = state.start;
1424
1425 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001426 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001427 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001428#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001429 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001430#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001431 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001433 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001434
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001435 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001436}
1437
1438static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001439pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001440{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001441 SRE_STATE state;
1442 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001443
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001444 PyObject* string;
1445 int start = 0;
1446 int end = INT_MAX;
1447 if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
1448 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001449
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001450 string = state_init(&state, self, string, start, end);
1451 if (!string)
1452 return NULL;
1453
1454 if (state.charsize == 1) {
1455 status = sre_search(&state, PatternObject_GetCode(self));
1456 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001457#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001458 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001459#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001460 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001461
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001462 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001463
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001464 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001465}
1466
1467static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001468call(char* function, PyObject* args)
1469{
1470 PyObject* name;
1471 PyObject* module;
1472 PyObject* func;
1473 PyObject* result;
1474
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001475 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001476 if (!name)
1477 return NULL;
1478 module = PyImport_Import(name);
1479 Py_DECREF(name);
1480 if (!module)
1481 return NULL;
1482 func = PyObject_GetAttrString(module, function);
1483 Py_DECREF(module);
1484 if (!func)
1485 return NULL;
1486 result = PyObject_CallObject(func, args);
1487 Py_DECREF(func);
1488 Py_DECREF(args);
1489 return result;
1490}
1491
1492static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001493pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001494{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001495 PyObject* template;
1496 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001497 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001498 if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
1499 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001500
1501 /* delegate to Python code */
1502 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1503}
1504
1505static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001506pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001507{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001508 PyObject* template;
1509 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001510 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001511 if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
1512 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001513
1514 /* delegate to Python code */
1515 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1516}
1517
1518static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001519pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001520{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001521 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001522 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001523 if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
1524 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001525
1526 /* delegate to Python code */
1527 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1528}
1529
1530static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001531pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001532{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001533 SRE_STATE state;
1534 PyObject* list;
1535 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001536 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001537
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001538 PyObject* string;
1539 int start = 0;
1540 int end = INT_MAX;
1541 if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
1542 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001543
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001544 string = state_init(&state, self, string, start, end);
1545 if (!string)
1546 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001547
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001548 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001549
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001550 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001551
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001552 PyObject* item;
1553
1554 state.ptr = state.start;
1555
1556 if (state.charsize == 1) {
1557 status = sre_search(&state, PatternObject_GetCode(self));
1558 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001559#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001560 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001561#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001562 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001563
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001564 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001565
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001566 /* don't bother to build a match object */
1567 switch (self->groups) {
1568 case 0:
1569 item = PySequence_GetSlice(
1570 string,
1571 ((char*) state.start - (char*) state.beginning) /
1572 state.charsize,
1573 ((char*) state.ptr - (char*) state.beginning) /
1574 state.charsize);
1575 if (!item)
1576 goto error;
1577 break;
1578 case 1:
1579 item = state_getslice(&state, 1, string);
1580 if (!item)
1581 goto error;
1582 break;
1583 default:
1584 item = PyTuple_New(self->groups);
1585 if (!item)
1586 goto error;
1587 for (i = 0; i < self->groups; i++) {
1588 PyObject* o = state_getslice(&state, i+1, string);
1589 if (!o) {
1590 Py_DECREF(item);
1591 goto error;
1592 }
1593 PyTuple_SET_ITEM(item, i, o);
1594 }
1595 break;
1596 }
1597
1598 if (PyList_Append(list, item) < 0) {
1599 Py_DECREF(item);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001600 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001601 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001602
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001603 if (state.ptr == state.start)
1604 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001605 else
1606 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001607
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001608 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001609
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001610 if (status == 0)
1611 break;
1612
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001613 /* internal error */
1614 PyErr_SetString(
1615 PyExc_RuntimeError,
1616 "internal error in regular expression engine"
1617 );
1618 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001619
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001620 }
1621 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001622
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001623 state_fini(&state);
1624 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001625
1626error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001627 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001628 state_fini(&state);
1629 return NULL;
1630
Guido van Rossumb700df92000-03-31 14:59:30 +00001631}
1632
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001633static PyMethodDef pattern_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001634 {"match", (PyCFunction) pattern_match, 1},
1635 {"search", (PyCFunction) pattern_search, 1},
1636 {"sub", (PyCFunction) pattern_sub, 1},
1637 {"subn", (PyCFunction) pattern_subn, 1},
1638 {"split", (PyCFunction) pattern_split, 1},
1639 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001640 /* experimental */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001641 {"scanner", (PyCFunction) pattern_scanner, 1},
1642 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001643};
1644
1645static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001646pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001647{
1648 PyObject* res;
1649
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001650 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001651
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001652 if (res)
1653 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001654
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001655 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001656
1657 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001658 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001659 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001660 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001661 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001662
1663 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001664 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001665
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001666 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001667 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001668
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001669 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001670 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001671 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001672 }
1673
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001674 PyErr_SetString(PyExc_AttributeError, name);
1675 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001676}
1677
1678statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001679 PyObject_HEAD_INIT(NULL)
1680 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001681 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001682 (destructor)pattern_dealloc, /*tp_dealloc*/
1683 0, /*tp_print*/
1684 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001685};
1686
1687/* -------------------------------------------------------------------- */
1688/* match methods */
1689
1690static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001691match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001692{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 Py_XDECREF(self->regs);
1694 Py_XDECREF(self->string);
1695 Py_DECREF(self->pattern);
1696 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001697}
1698
1699static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001700match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001701{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 if (index < 0 || index >= self->groups) {
1703 /* raise IndexError if we were given a bad group number */
1704 PyErr_SetString(
1705 PyExc_IndexError,
1706 "no such group"
1707 );
1708 return NULL;
1709 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001710
Fredrik Lundh6f013982000-07-03 18:44:21 +00001711 index *= 2;
1712
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001713 if (self->string == Py_None || self->mark[index] < 0) {
1714 /* return default value if the string or group is undefined */
1715 Py_INCREF(def);
1716 return def;
1717 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001718
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001719 return PySequence_GetSlice(
1720 self->string, self->mark[index], self->mark[index+1]
1721 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001722}
1723
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001724static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001725match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001726{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001727 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001728
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001729 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001730 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001731
Fredrik Lundh6f013982000-07-03 18:44:21 +00001732 i = -1;
1733
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001734 if (self->pattern->groupindex) {
1735 index = PyObject_GetItem(self->pattern->groupindex, index);
1736 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001737 if (PyInt_Check(index))
1738 i = (int) PyInt_AS_LONG(index);
1739 Py_DECREF(index);
1740 } else
1741 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001742 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001743
1744 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001745}
1746
1747static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001748match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001749{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001750 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001751}
1752
1753static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001754match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001755{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001756 PyObject* result;
1757 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001759 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001760
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001761 switch (size) {
1762 case 0:
1763 result = match_getslice(self, Py_False, Py_None);
1764 break;
1765 case 1:
1766 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1767 break;
1768 default:
1769 /* fetch multiple items */
1770 result = PyTuple_New(size);
1771 if (!result)
1772 return NULL;
1773 for (i = 0; i < size; i++) {
1774 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001775 self, PyTuple_GET_ITEM(args, i), Py_None
1776 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001777 if (!item) {
1778 Py_DECREF(result);
1779 return NULL;
1780 }
1781 PyTuple_SET_ITEM(result, i, item);
1782 }
1783 break;
1784 }
1785 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001786}
1787
1788static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001789match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001790{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001791 PyObject* result;
1792 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001793
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001794 PyObject* def = Py_None;
1795 if (!PyArg_ParseTuple(args, "|O:groups", &def))
1796 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001798 result = PyTuple_New(self->groups-1);
1799 if (!result)
1800 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001801
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001802 for (index = 1; index < self->groups; index++) {
1803 PyObject* item;
1804 item = match_getslice_by_index(self, index, def);
1805 if (!item) {
1806 Py_DECREF(result);
1807 return NULL;
1808 }
1809 PyTuple_SET_ITEM(result, index-1, item);
1810 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001811
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001812 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001813}
1814
1815static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001816match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001817{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001818 PyObject* result;
1819 PyObject* keys;
1820 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001821
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001822 PyObject* def = Py_None;
1823 if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
1824 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001825
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001826 result = PyDict_New();
1827 if (!result || !self->pattern->groupindex)
1828 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001829
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001830 keys = PyMapping_Keys(self->pattern->groupindex);
1831 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001832 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001833 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001834 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001835
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001836 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1837 PyObject* key;
1838 PyObject* item;
1839 key = PyList_GET_ITEM(keys, index);
1840 if (!key) {
1841 Py_DECREF(keys);
1842 Py_DECREF(result);
1843 return NULL;
1844 }
1845 item = match_getslice(self, key, def);
1846 if (!item) {
1847 Py_DECREF(key);
1848 Py_DECREF(keys);
1849 Py_DECREF(result);
1850 return NULL;
1851 }
1852 /* FIXME: <fl> this can fail, right? */
1853 PyDict_SetItem(result, key, item);
1854 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001855
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001856 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001857
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001858 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001859}
1860
1861static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001862match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001863{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001864 int index;
1865
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001866 PyObject* index_ = Py_False; /* zero */
1867 if (!PyArg_ParseTuple(args, "|O:start", &index_))
1868 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001869
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001870 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001871
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 if (index < 0 || index >= self->groups) {
1873 PyErr_SetString(
1874 PyExc_IndexError,
1875 "no such group"
1876 );
1877 return NULL;
1878 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001879
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001880 if (self->mark[index*2] < 0) {
1881 Py_INCREF(Py_None);
1882 return Py_None;
1883 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001884
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001885 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00001886}
1887
1888static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001889match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001890{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001891 int index;
1892
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 PyObject* index_ = Py_False; /* zero */
1894 if (!PyArg_ParseTuple(args, "|O:end", &index_))
1895 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001896
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001897 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001898
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001899 if (index < 0 || index >= self->groups) {
1900 PyErr_SetString(
1901 PyExc_IndexError,
1902 "no such group"
1903 );
1904 return NULL;
1905 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001906
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001907 if (self->mark[index*2] < 0) {
1908 Py_INCREF(Py_None);
1909 return Py_None;
1910 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001911
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001912 return Py_BuildValue("i", self->mark[index*2+1]);
1913}
1914
1915LOCAL(PyObject*)
1916_pair(int i1, int i2)
1917{
1918 PyObject* pair;
1919 PyObject* item;
1920
1921 pair = PyTuple_New(2);
1922 if (!pair)
1923 return NULL;
1924
1925 item = PyInt_FromLong(i1);
1926 if (!item)
1927 goto error;
1928 PyTuple_SET_ITEM(pair, 0, item);
1929
1930 item = PyInt_FromLong(i2);
1931 if (!item)
1932 goto error;
1933 PyTuple_SET_ITEM(pair, 1, item);
1934
1935 return pair;
1936
1937 error:
1938 Py_DECREF(pair);
1939 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001940}
1941
1942static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001943match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001944{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001945 int index;
1946
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001947 PyObject* index_ = Py_False; /* zero */
1948 if (!PyArg_ParseTuple(args, "|O:span", &index_))
1949 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001950
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001951 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001952
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001953 if (index < 0 || index >= self->groups) {
1954 PyErr_SetString(
1955 PyExc_IndexError,
1956 "no such group"
1957 );
1958 return NULL;
1959 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001960
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001961 if (self->mark[index*2] < 0) {
1962 Py_INCREF(Py_None);
1963 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001964 return Py_BuildValue("OO", Py_None, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001965 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001966
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001967 return _pair(self->mark[index*2], self->mark[index*2+1]);
1968}
1969
1970static PyObject*
1971match_regs(MatchObject* self)
1972{
1973 PyObject* regs;
1974 PyObject* item;
1975 int index;
1976
1977 regs = PyTuple_New(self->groups);
1978 if (!regs)
1979 return NULL;
1980
1981 for (index = 0; index < self->groups; index++) {
1982 item = _pair(self->mark[index*2], self->mark[index*2+1]);
1983 if (!item) {
1984 Py_DECREF(regs);
1985 return NULL;
1986 }
1987 PyTuple_SET_ITEM(regs, index, item);
1988 }
1989
1990 Py_INCREF(regs);
1991 self->regs = regs;
1992
1993 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00001994}
1995
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001996static PyMethodDef match_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001997 {"group", (PyCFunction) match_group, 1},
1998 {"start", (PyCFunction) match_start, 1},
1999 {"end", (PyCFunction) match_end, 1},
2000 {"span", (PyCFunction) match_span, 1},
2001 {"groups", (PyCFunction) match_groups, 1},
2002 {"groupdict", (PyCFunction) match_groupdict, 1},
2003 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002004};
2005
2006static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002007match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002008{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002009 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002010
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002011 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2012 if (res)
2013 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002014
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002015 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002016
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002017 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002018 if (self->lastindex >= 0)
2019 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002020 Py_INCREF(Py_None);
2021 return Py_None;
2022 }
2023
2024 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002025 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002026 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002027 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002028 );
2029 if (result)
2030 return result;
2031 PyErr_Clear();
2032 }
2033 Py_INCREF(Py_None);
2034 return Py_None;
2035 }
2036
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002037 if (!strcmp(name, "string")) {
2038 if (self->string) {
2039 Py_INCREF(self->string);
2040 return self->string;
2041 } else {
2042 Py_INCREF(Py_None);
2043 return Py_None;
2044 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002045 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002046
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002047 if (!strcmp(name, "regs")) {
2048 if (self->regs) {
2049 Py_INCREF(self->regs);
2050 return self->regs;
2051 } else
2052 return match_regs(self);
2053 }
2054
2055 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002056 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002057 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002058 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002059
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 if (!strcmp(name, "pos"))
2061 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002062
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002063 if (!strcmp(name, "endpos"))
2064 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002065
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 PyErr_SetString(PyExc_AttributeError, name);
2067 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002068}
2069
2070/* FIXME: implement setattr("string", None) as a special case (to
2071 detach the associated string, if any */
2072
2073statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002074 PyObject_HEAD_INIT(NULL)
2075 0, "SRE_Match",
2076 sizeof(MatchObject), sizeof(int),
2077 (destructor)match_dealloc, /*tp_dealloc*/
2078 0, /*tp_print*/
2079 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002080};
2081
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002082/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002083/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002084
2085static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002086scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002087{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002088 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002089 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002090 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002091}
2092
2093static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002094scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002095{
2096 SRE_STATE* state = &self->state;
2097 PyObject* match;
2098 int status;
2099
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002100 state_reset(state);
2101
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002102 state->ptr = state->start;
2103
2104 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002105 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002106 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002107#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002108 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002109#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002110 }
2111
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002112 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002113 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002114
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002115 if (status == 0 || state->ptr == state->start)
2116 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002117 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002118 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002119
2120 return match;
2121}
2122
2123
2124static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002125scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002126{
2127 SRE_STATE* state = &self->state;
2128 PyObject* match;
2129 int status;
2130
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002131 state_reset(state);
2132
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002133 state->ptr = state->start;
2134
2135 if (state->charsize == 1) {
2136 status = sre_search(state, PatternObject_GetCode(self->pattern));
2137 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002138#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002139 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002140#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002141 }
2142
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002143 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002145
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002146 if (status == 0 || state->ptr == state->start)
2147 state->start = (void*) ((char*) state->ptr + state->charsize);
2148 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002149 state->start = state->ptr;
2150
2151 return match;
2152}
2153
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002154static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002155 {"match", (PyCFunction) scanner_match, 0},
2156 {"search", (PyCFunction) scanner_search, 0},
2157 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002158};
2159
2160static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002161scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002162{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002164
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002165 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2166 if (res)
2167 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002168
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002169 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002170
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002171 /* attributes */
2172 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002173 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002174 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002175 }
2176
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002177 PyErr_SetString(PyExc_AttributeError, name);
2178 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002179}
2180
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002181statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002182 PyObject_HEAD_INIT(NULL)
2183 0, "SRE_Scanner",
2184 sizeof(ScannerObject), 0,
2185 (destructor)scanner_dealloc, /*tp_dealloc*/
2186 0, /*tp_print*/
2187 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002188};
2189
Guido van Rossumb700df92000-03-31 14:59:30 +00002190static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002191 {"compile", _compile, 1},
2192 {"getcodesize", sre_codesize, 1},
2193 {"getlower", sre_getlower, 1},
2194 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002195};
2196
2197void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002198#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002199__declspec(dllexport)
2200#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002201init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002202{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002203 /* Patch object types */
2204 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002205 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002206
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002207 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002208}
2209
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002210#endif /* !defined(SRE_RECURSIVE) */