blob: 8811038d0f703f37b7f6e2bebcdfb8b0c1ec25b2 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000027 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000029 * 2001-03-20 fl lots of fixes for 2.1b2
Guido van Rossumb700df92000-03-31 14:59:30 +000030 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000031 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000032 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * This version of the SRE library can be redistributed under CNRI's
34 * Python 1.6 license. For any other use, please contact Secret Labs
35 * AB (info@pythonware.com).
36 *
Guido van Rossumb700df92000-03-31 14:59:30 +000037 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000038 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000039 * other compatibility work.
40 */
41
42#ifndef SRE_RECURSIVE
43
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000044char copyright[] = " SRE 2.1b2 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "Python.h"
47
48#include "sre.h"
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d52000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d52000-06-29 08:58:44 +000056
Guido van Rossumb700df92000-03-31 14:59:30 +000057/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000058#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000059
Fredrik Lundh436c3d52000-06-29 08:58:44 +000060#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000061/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000062#define HAVE_UNICODE
63#endif
64
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000065/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000067
Fredrik Lundh33accc12000-08-27 20:59:47 +000068/* prevent run-away recursion (bad patterns on long strings) */
69
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000070#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000071#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
72/* require smaller recursion limit for a number of 64-bit platforms:
73 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
74/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
75#define USE_RECURSION_LIMIT 7500
76#else
77#define USE_RECURSION_LIMIT 10000
78#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000079#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000080
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000082#define USE_FAST_SEARCH
83
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000084/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000085#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000086
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000087#if PY_VERSION_HEX < 0x01060000
88#define PyObject_DEL(op) PyMem_DEL((op))
89#endif
90
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091/* -------------------------------------------------------------------- */
92
Fredrik Lundh80946112000-06-29 18:03:25 +000093#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000094#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000095#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000096/* fastest possible local call under MSVC */
97#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000098#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000099#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100#else
101#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000102#endif
103
104/* error codes */
105#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000106#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000107#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000108#define SRE_ERROR_MEMORY -9 /* out of memory */
109
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000110#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000111#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000112#else
113#define TRACE(v)
114#endif
115
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000116/* -------------------------------------------------------------------- */
117/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000118
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000119/* default character predicates (run sre_chars.py to regenerate tables) */
120
121#define SRE_DIGIT_MASK 1
122#define SRE_SPACE_MASK 2
123#define SRE_LINEBREAK_MASK 4
124#define SRE_ALNUM_MASK 8
125#define SRE_WORD_MASK 16
126
127static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1282, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1290, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
13025, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1320, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
134
Fredrik Lundhb389df32000-06-29 12:48:37 +0000135static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000013610, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13727, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
13844, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13961, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
140108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
141122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
142106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
143120, 121, 122, 123, 124, 125, 126, 127 };
144
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000145#define SRE_IS_DIGIT(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
147#define SRE_IS_SPACE(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
149#define SRE_IS_LINEBREAK(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
151#define SRE_IS_ALNUM(ch)\
152 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
153#define SRE_IS_WORD(ch)\
154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000155
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000156static unsigned int sre_lower(unsigned int ch)
157{
158 return ((ch) < 128 ? sre_char_lower[ch] : ch);
159}
160
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000161/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000162
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
164#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
165#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
166#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
167#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
168
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000169static unsigned int sre_lower_locale(unsigned int ch)
170{
171 return ((ch) < 256 ? tolower((ch)) : ch);
172}
173
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000174/* unicode-specific character predicates */
175
176#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000177
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000178#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
179#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
180#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000181#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000183
184static unsigned int sre_lower_unicode(unsigned int ch)
185{
186 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
187}
188
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000189#endif
190
Guido van Rossumb700df92000-03-31 14:59:30 +0000191LOCAL(int)
192sre_category(SRE_CODE category, unsigned int ch)
193{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000194 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000195
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000196 case SRE_CATEGORY_DIGIT:
197 return SRE_IS_DIGIT(ch);
198 case SRE_CATEGORY_NOT_DIGIT:
199 return !SRE_IS_DIGIT(ch);
200 case SRE_CATEGORY_SPACE:
201 return SRE_IS_SPACE(ch);
202 case SRE_CATEGORY_NOT_SPACE:
203 return !SRE_IS_SPACE(ch);
204 case SRE_CATEGORY_WORD:
205 return SRE_IS_WORD(ch);
206 case SRE_CATEGORY_NOT_WORD:
207 return !SRE_IS_WORD(ch);
208 case SRE_CATEGORY_LINEBREAK:
209 return SRE_IS_LINEBREAK(ch);
210 case SRE_CATEGORY_NOT_LINEBREAK:
211 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000212
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000213 case SRE_CATEGORY_LOC_WORD:
214 return SRE_LOC_IS_WORD(ch);
215 case SRE_CATEGORY_LOC_NOT_WORD:
216 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000217
218#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000219 case SRE_CATEGORY_UNI_DIGIT:
220 return SRE_UNI_IS_DIGIT(ch);
221 case SRE_CATEGORY_UNI_NOT_DIGIT:
222 return !SRE_UNI_IS_DIGIT(ch);
223 case SRE_CATEGORY_UNI_SPACE:
224 return SRE_UNI_IS_SPACE(ch);
225 case SRE_CATEGORY_UNI_NOT_SPACE:
226 return !SRE_UNI_IS_SPACE(ch);
227 case SRE_CATEGORY_UNI_WORD:
228 return SRE_UNI_IS_WORD(ch);
229 case SRE_CATEGORY_UNI_NOT_WORD:
230 return !SRE_UNI_IS_WORD(ch);
231 case SRE_CATEGORY_UNI_LINEBREAK:
232 return SRE_UNI_IS_LINEBREAK(ch);
233 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
234 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000235#else
236 case SRE_CATEGORY_UNI_DIGIT:
237 return SRE_IS_DIGIT(ch);
238 case SRE_CATEGORY_UNI_NOT_DIGIT:
239 return !SRE_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_SPACE:
241 return SRE_IS_SPACE(ch);
242 case SRE_CATEGORY_UNI_NOT_SPACE:
243 return !SRE_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_WORD:
245 return SRE_LOC_IS_WORD(ch);
246 case SRE_CATEGORY_UNI_NOT_WORD:
247 return !SRE_LOC_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_LINEBREAK:
249 return SRE_IS_LINEBREAK(ch);
250 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
251 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000252#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000253 }
254 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000255}
256
257/* helpers */
258
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000259static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000260mark_fini(SRE_STATE* state)
261{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000262 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000263 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000264 state->mark_stack = NULL;
265 }
266 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000267}
268
269static int
270mark_save(SRE_STATE* state, int lo, int hi)
271{
272 void* stack;
273 int size;
274 int minsize, newsize;
275
276 if (hi <= lo)
277 return 0;
278
279 size = (hi - lo) + 1;
280
281 newsize = state->mark_stack_size;
282 minsize = state->mark_stack_base + size;
283
284 if (newsize < minsize) {
285 /* create new stack */
286 if (!newsize) {
287 newsize = 512;
288 if (newsize < minsize)
289 newsize = minsize;
290 TRACE(("allocate stack %d\n", newsize));
291 stack = malloc(sizeof(void*) * newsize);
292 } else {
293 /* grow the stack */
294 while (newsize < minsize)
295 newsize += newsize;
296 TRACE(("grow stack to %d\n", newsize));
297 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
298 }
299 if (!stack) {
300 mark_fini(state);
301 return SRE_ERROR_MEMORY;
302 }
303 state->mark_stack = stack;
304 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000305 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000306
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000307 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000308
309 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
310 size * sizeof(void*));
311
312 state->mark_stack_base += size;
313
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000314 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000315}
316
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000317static int
318mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000319{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000320 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000321
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000322 if (hi <= lo)
323 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000324
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000325 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000326
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000327 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000328
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000329 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000330
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000331 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
332 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000333
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000334 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000335}
336
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000337/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000338
339#define SRE_CHAR unsigned char
340#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000341#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000342#define SRE_CHARSET sre_charset
343#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000344#define SRE_MATCH sre_match
345#define SRE_SEARCH sre_search
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000346
347#if defined(HAVE_UNICODE)
348
Guido van Rossumb700df92000-03-31 14:59:30 +0000349#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000350#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000351#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000352
Guido van Rossumb700df92000-03-31 14:59:30 +0000353#undef SRE_SEARCH
354#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000355#undef SRE_INFO
356#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000357#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000358#undef SRE_AT
359#undef SRE_CHAR
360
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000361/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000362
363#define SRE_CHAR Py_UNICODE
364#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000365#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000366#define SRE_CHARSET sre_ucharset
367#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000368#define SRE_MATCH sre_umatch
369#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000370#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000371
372#endif /* SRE_RECURSIVE */
373
374/* -------------------------------------------------------------------- */
375/* String matching engine */
376
377/* the following section is compiled twice, with different character
378 settings */
379
380LOCAL(int)
381SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
382{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000383 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000387 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000389 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000390 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000391 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 case SRE_AT_BEGINNING_LINE:
394 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000395 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000396
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000398 return (((void*) (ptr+1) == state->end &&
399 SRE_IS_LINEBREAK((int) ptr[0])) ||
400 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000401
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 case SRE_AT_END_LINE:
403 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000404 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000405
Fredrik Lundh770617b2001-01-14 15:06:11 +0000406 case SRE_AT_END_STRING:
407 return ((void*) ptr == state->end);
408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 case SRE_AT_BOUNDARY:
410 if (state->beginning == state->end)
411 return 0;
412 that = ((void*) ptr > state->beginning) ?
413 SRE_IS_WORD((int) ptr[-1]) : 0;
414 this = ((void*) ptr < state->end) ?
415 SRE_IS_WORD((int) ptr[0]) : 0;
416 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000417
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000418 case SRE_AT_NON_BOUNDARY:
419 if (state->beginning == state->end)
420 return 0;
421 that = ((void*) ptr > state->beginning) ?
422 SRE_IS_WORD((int) ptr[-1]) : 0;
423 this = ((void*) ptr < state->end) ?
424 SRE_IS_WORD((int) ptr[0]) : 0;
425 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000426
427 case SRE_AT_LOC_BOUNDARY:
428 if (state->beginning == state->end)
429 return 0;
430 that = ((void*) ptr > state->beginning) ?
431 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
432 this = ((void*) ptr < state->end) ?
433 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
434 return this != that;
435
436 case SRE_AT_LOC_NON_BOUNDARY:
437 if (state->beginning == state->end)
438 return 0;
439 that = ((void*) ptr > state->beginning) ?
440 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
441 this = ((void*) ptr < state->end) ?
442 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
443 return this == that;
444
445 case SRE_AT_UNI_BOUNDARY:
446 if (state->beginning == state->end)
447 return 0;
448 that = ((void*) ptr > state->beginning) ?
449 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
450 this = ((void*) ptr < state->end) ?
451 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
452 return this != that;
453
454 case SRE_AT_UNI_NON_BOUNDARY:
455 if (state->beginning == state->end)
456 return 0;
457 that = ((void*) ptr > state->beginning) ?
458 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
459 this = ((void*) ptr < state->end) ?
460 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
461 return this == that;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000462 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000463
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000464 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000465}
466
467LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000468SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000469{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000470 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000471
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000472 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000473
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000474 for (;;) {
475 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000476
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000477 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000478 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000479 if (ch == set[0])
480 return ok;
481 set++;
482 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000483
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000484 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000485 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000486 if (set[0] <= ch && ch <= set[1])
487 return ok;
488 set += 2;
489 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000490
Fredrik Lundh3562f112000-07-02 12:00:07 +0000491 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000492 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000493 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
494 return ok;
495 set += 16;
496 break;
497
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000498 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000499 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 if (sre_category(set[0], (int) ch))
501 return ok;
502 set += 1;
503 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000504
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000505 case SRE_OP_NEGATE:
506 ok = !ok;
507 break;
508
509 case SRE_OP_FAILURE:
510 return !ok;
511
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000512 default:
513 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000514 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000515 return 0;
516 }
517 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000518}
519
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000520LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
521
522LOCAL(int)
523SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
524{
525 SRE_CODE chr;
526 SRE_CHAR* ptr = state->ptr;
527 SRE_CHAR* end = state->end;
528 int i;
529
530 /* adjust end */
531 if (maxcount < end - ptr && maxcount != 65535)
532 end = ptr + maxcount;
533
534 switch (pattern[0]) {
535
536 case SRE_OP_ANY:
537 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000538 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000539 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
540 ptr++;
541 break;
542
543 case SRE_OP_ANY_ALL:
544 /* repeated dot wildcare. skip to the end of the target
545 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000546 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000547 ptr = end;
548 break;
549
550 case SRE_OP_LITERAL:
551 /* repeated literal */
552 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000553 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000554 while (ptr < end && (SRE_CODE) *ptr == chr)
555 ptr++;
556 break;
557
558 case SRE_OP_LITERAL_IGNORE:
559 /* repeated literal */
560 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000561 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000562 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
563 ptr++;
564 break;
565
566 case SRE_OP_NOT_LITERAL:
567 /* repeated non-literal */
568 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000569 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000570 while (ptr < end && (SRE_CODE) *ptr != chr)
571 ptr++;
572 break;
573
574 case SRE_OP_NOT_LITERAL_IGNORE:
575 /* repeated non-literal */
576 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000577 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000578 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
579 ptr++;
580 break;
581
582 case SRE_OP_IN:
583 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000584 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
585 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000586 ptr++;
587 break;
588
589 default:
590 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000591 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000592 while ((SRE_CHAR*) state->ptr < end) {
593 i = SRE_MATCH(state, pattern, level);
594 if (i < 0)
595 return i;
596 if (!i)
597 break;
598 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000599 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
600 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000601 return (SRE_CHAR*) state->ptr - ptr;
602 }
603
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000604 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000605 return ptr - (SRE_CHAR*) state->ptr;
606}
607
Fredrik Lundh33accc12000-08-27 20:59:47 +0000608#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000609LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000610SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
611{
612 /* check if an SRE_OP_INFO block matches at the current position.
613 returns the number of SRE_CODE objects to skip if successful, 0
614 if no match */
615
616 SRE_CHAR* end = state->end;
617 SRE_CHAR* ptr = state->ptr;
618 int i;
619
620 /* check minimal length */
621 if (pattern[3] && (end - ptr) < pattern[3])
622 return 0;
623
624 /* check known prefix */
625 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
626 /* <length> <skip> <prefix data> <overlap data> */
627 for (i = 0; i < pattern[5]; i++)
628 if ((SRE_CODE) ptr[i] != pattern[7 + i])
629 return 0;
630 return pattern[0] + 2 * pattern[6];
631 }
632 return pattern[0];
633}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000634#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000635
636LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000637SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000638{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000639 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000640 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000641
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000642 SRE_CHAR* end = state->end;
643 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000644 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000645 SRE_REPEAT* rp;
646 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000647 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000648
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000649 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000650
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000651 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000652
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000653#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000654 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000655 return SRE_ERROR_RECURSION_LIMIT;
656#endif
657
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000658#if defined(USE_RECURSION_LIMIT)
659 if (level > USE_RECURSION_LIMIT)
660 return SRE_ERROR_RECURSION_LIMIT;
661#endif
662
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000663 if (pattern[0] == SRE_OP_INFO) {
664 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000665 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000666 if (pattern[3] && (end - ptr) < pattern[3]) {
667 TRACE(("reject (got %d chars, need %d)\n",
668 (end - ptr), pattern[3]));
669 return 0;
670 }
671 pattern += pattern[1] + 1;
672 }
673
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000677
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 case SRE_OP_FAILURE:
679 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000680 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000681 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000682
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000683 case SRE_OP_SUCCESS:
684 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000686 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000687 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000688
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 case SRE_OP_AT:
690 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000691 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000692 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000693 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000694 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 pattern++;
696 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000697
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 case SRE_OP_CATEGORY:
699 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000700 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000701 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000702 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000703 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000704 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000705 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000706 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000707
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000708 case SRE_OP_LITERAL:
709 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000710 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000711 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000712 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000713 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000714 pattern++;
715 ptr++;
716 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000717
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000718 case SRE_OP_NOT_LITERAL:
719 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000720 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000721 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000722 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000723 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 pattern++;
725 ptr++;
726 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000727
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000728 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000729 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000730 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000731 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000732 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
733 return 0;
734 ptr++;
735 break;
736
737 case SRE_OP_ANY_ALL:
738 /* match anything */
739 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000740 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000742 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 ptr++;
744 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000745
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 case SRE_OP_IN:
747 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000748 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000749 TRACE(("|%p|%p|IN\n", pattern, ptr));
750 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000751 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 pattern += pattern[0];
753 ptr++;
754 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000755
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000756 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000758 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 i = pattern[0];
760 {
761 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
762 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
763 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000764 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000765 while (p < e) {
766 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000767 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 p++; ptr++;
769 }
770 }
771 pattern++;
772 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000773
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000774 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000775 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000776 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000777 i = pattern[0];
778 {
779 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
780 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
781 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000782 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000783 while (p < e) {
784 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000785 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000786 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000787 p++; ptr++;
788 }
789 }
790 pattern++;
791 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000792
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000794 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000795 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000796 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000797 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000798 pattern++;
799 ptr++;
800 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000801
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000802 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000803 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000805 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000806 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000807 pattern++;
808 ptr++;
809 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000810
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000811 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000812 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000813 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000814 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000815 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 pattern += pattern[0];
817 ptr++;
818 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000819
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000820 case SRE_OP_MARK:
821 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000822 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000823 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000824 i = pattern[0];
825 if (i & 1)
826 state->lastindex = i/2 + 1;
827 if (i > state->lastmark)
828 state->lastmark = i;
829 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000830 pattern++;
831 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000832
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000833 case SRE_OP_JUMP:
834 case SRE_OP_INFO:
835 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000836 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000837 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000838 pattern += pattern[0];
839 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000840
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000841 case SRE_OP_ASSERT:
842 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000843 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000844 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000845 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000846 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000847 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000848 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000849 if (i <= 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000850 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000851 pattern += pattern[0];
852 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000853
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000854 case SRE_OP_ASSERT_NOT:
855 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000856 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000857 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000858 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000859 if (state->ptr >= state->beginning) {
860 i = SRE_MATCH(state, pattern + 2, level + 1);
861 if (i < 0)
862 return i;
863 if (i)
864 return 0;
865 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000866 pattern += pattern[0];
867 break;
868
869 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000870 /* alternation */
871 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000872 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000873 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000874 for (; pattern[0]; pattern += pattern[0]) {
875 if (pattern[1] == SRE_OP_LITERAL &&
876 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
877 continue;
878 if (pattern[1] == SRE_OP_IN &&
879 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
880 continue;
881 state->ptr = ptr;
882 i = SRE_MATCH(state, pattern + 1, level + 1);
883 if (i)
884 return i;
885 if (state->lastmark > lastmark) {
886 memset(
887 state->mark + lastmark + 1, 0,
888 (state->lastmark - lastmark) * sizeof(void*)
889 );
890 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000891 }
892 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000893 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000894
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000895 case SRE_OP_REPEAT_ONE:
896 /* match repeated sequence (maximizing regexp) */
897
898 /* this operator only works if the repeated item is
899 exactly one character wide, and we're not already
900 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000901 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000902
903 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
904
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000905 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000906 pattern[1], pattern[2]));
907
Fredrik Lundhe1869832000-08-01 22:47:49 +0000908 if (ptr + pattern[1] > end)
909 return 0; /* cannot match */
910
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000911 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000912
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000913 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
914 if (count < 0)
915 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000916
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000917 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000918
919 /* when we arrive here, count contains the number of
920 matches, and ptr points to the tail of the target
921 string. check if the rest of the pattern matches,
922 and backtrack if not. */
923
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000924 if (count < (int) pattern[1])
925 return 0;
926
927 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
928 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000929 state->ptr = ptr;
930 return 1;
931
932 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
933 /* tail starts with a literal. skip positions where
934 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000935 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000936 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000937 while (count >= (int) pattern[1] &&
938 (ptr >= end || *ptr != chr)) {
939 ptr--;
940 count--;
941 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000942 if (count < (int) pattern[1])
943 break;
944 state->ptr = ptr;
945 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000946 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000947 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000948 ptr--;
949 count--;
950 }
951
952 } else {
953 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000954 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000955 while (count >= (int) pattern[1]) {
956 state->ptr = ptr;
957 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000958 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000959 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000960 ptr--;
961 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000962 if (state->lastmark > lastmark) {
963 memset(
964 state->mark + lastmark + 1, 0,
965 (state->lastmark - lastmark) * sizeof(void*)
966 );
967 state->lastmark = lastmark;
968 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000969 }
970 }
971 return 0;
972
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000973 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000974 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +0000975 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000976 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000977 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000978 pattern[1], pattern[2]));
979
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000980 rep.count = -1;
981 rep.pattern = pattern;
982
983 /* install new repeat context */
984 rep.prev = state->repeat;
985 state->repeat = &rep;
986
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000987 state->ptr = ptr;
988 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000989
990 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000991
992 return i;
993
994 case SRE_OP_MAX_UNTIL:
995 /* maximizing repeat */
996 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
997
998 /* FIXME: we probably need to deal with zero-width
999 matches in here... */
1000
1001 rp = state->repeat;
1002 if (!rp)
1003 return SRE_ERROR_STATE;
1004
1005 state->ptr = ptr;
1006
1007 count = rp->count + 1;
1008
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001009 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001010
1011 if (count < rp->pattern[1]) {
1012 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001013 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001014 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001015 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001016 if (i)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001017 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001018 rp->count = count - 1;
1019 state->ptr = ptr;
1020 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001021 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001022
1023 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001024 /* we may have enough matches, but if we can
1025 match another item, do so */
1026 rp->count = count;
1027 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001028 i = mark_save(state, 0, lastmark);
1029 if (i < 0)
1030 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001031 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001032 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001033 if (i)
1034 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001035 i = mark_restore(state, 0, lastmark);
1036 if (i < 0)
1037 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001038 rp->count = count - 1;
1039 state->ptr = ptr;
1040 }
1041
1042 /* cannot match more repeated items here. make sure the
1043 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001044 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001045 i = SRE_MATCH(state, pattern, level + 1);
1046 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001047 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001048 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001049 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001050 return 0;
1051
1052 case SRE_OP_MIN_UNTIL:
1053 /* minimizing repeat */
1054 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1055
1056 rp = state->repeat;
1057 if (!rp)
1058 return SRE_ERROR_STATE;
1059
1060 count = rp->count + 1;
1061
Fredrik Lundh770617b2001-01-14 15:06:11 +00001062 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1063 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001064
1065 state->ptr = ptr;
1066
1067 if (count < rp->pattern[1]) {
1068 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001069 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001070 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001071 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001072 if (i)
1073 return i;
1074 rp->count = count-1;
1075 state->ptr = ptr;
1076 return 0;
1077 }
1078
1079 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001080 state->repeat = rp->prev;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +00001081 /* FIXME: the following fix doesn't always work (#133283) */
1082 if (0 && rp->pattern[2] == 65535) {
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001083 /* unbounded repeat */
1084 for (;;) {
1085 i = SRE_MATCH(state, pattern, level + 1);
1086 if (i || ptr >= end)
1087 break;
1088 state->ptr = ++ptr;
1089 }
1090 } else
1091 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001092 if (i) {
1093 /* free(rp); */
1094 return i;
1095 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001096
Fredrik Lundh770617b2001-01-14 15:06:11 +00001097 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001098 state->repeat = rp;
1099
1100 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1101 return 0;
1102
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001103 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001104 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001105 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001106 if (i)
1107 return i;
1108 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001109 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001110 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001111
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001112 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001113 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001114 return SRE_ERROR_ILLEGAL;
1115 }
1116 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001117
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001118 /* shouldn't end up here */
1119 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001120}
1121
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001122LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001123SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1124{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001125 SRE_CHAR* ptr = state->start;
1126 SRE_CHAR* end = state->end;
1127 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001128 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001129 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001130 SRE_CODE* prefix = NULL;
1131 SRE_CODE* charset = NULL;
1132 SRE_CODE* overlap = NULL;
1133 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001134
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001135 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001136 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001137 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001138
1139 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001140
1141 if (pattern[3] > 0) {
1142 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001143 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001144 end -= pattern[3]-1;
1145 if (end <= ptr)
1146 end = ptr+1;
1147 }
1148
Fredrik Lundh3562f112000-07-02 12:00:07 +00001149 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001150 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001151 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001152 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001153 prefix_skip = pattern[6];
1154 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001155 overlap = prefix + prefix_len - 1;
1156 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001157 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001158 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001159 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001160
1161 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001162 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001163
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001164 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1165 TRACE(("charset = %p\n", charset));
1166
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001167#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001168 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001169 /* pattern starts with a known prefix. use the overlap
1170 table to skip forward as fast as we possibly can */
1171 int i = 0;
1172 end = state->end;
1173 while (ptr < end) {
1174 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001175 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001176 if (!i)
1177 break;
1178 else
1179 i = overlap[i];
1180 } else {
1181 if (++i == prefix_len) {
1182 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001183 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1184 state->start = ptr + 1 - prefix_len;
1185 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001186 if (flags & SRE_INFO_LITERAL)
1187 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001188 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001189 if (status != 0)
1190 return status;
1191 /* close but no cigar -- try again */
1192 i = overlap[i];
1193 }
1194 break;
1195 }
1196
1197 }
1198 ptr++;
1199 }
1200 return 0;
1201 }
1202#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001203
Fredrik Lundh3562f112000-07-02 12:00:07 +00001204 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001205 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001206 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001207 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001208 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001209 for (;;) {
1210 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1211 ptr++;
1212 if (ptr == end)
1213 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001214 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001215 state->start = ptr;
1216 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001217 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001218 if (status != 0)
1219 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001220 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001221 } else if (charset) {
1222 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001223 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001224 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001225 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001226 ptr++;
1227 if (ptr == end)
1228 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001229 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001230 state->start = ptr;
1231 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001232 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001233 if (status != 0)
1234 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001235 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001236 }
1237 } else
1238 /* general case */
1239 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001240 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001241 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001242 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001243 if (status != 0)
1244 break;
1245 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001246
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001247 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001248}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001249
Guido van Rossumb700df92000-03-31 14:59:30 +00001250
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001251#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001252
1253/* -------------------------------------------------------------------- */
1254/* factories and destructors */
1255
1256/* see sre.h for object declarations */
1257
1258staticforward PyTypeObject Pattern_Type;
1259staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001260staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001261
1262static PyObject *
1263_compile(PyObject* self_, PyObject* args)
1264{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001265 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001266
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001267 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001268 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001269
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001270 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001271 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001272 PyObject* code;
1273 int groups = 0;
1274 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001275 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001276 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1277 &PyList_Type, &code, &groups,
1278 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001279 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001280
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001281 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001282
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001283 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001284 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001285 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001286
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001287 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001288 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001289 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001290 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001291
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001292 if (PyErr_Occurred()) {
1293 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001294 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001295 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001296
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001297 Py_INCREF(pattern);
1298 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001299
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001300 self->flags = flags;
1301
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001302 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001303
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001304 Py_XINCREF(groupindex);
1305 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001306
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001307 Py_XINCREF(indexgroup);
1308 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001309
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001310 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001311}
1312
1313static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001314sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001315{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001316 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001317}
1318
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001319static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001320sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001321{
1322 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001323 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001324 return NULL;
1325 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001326 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001327 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001328#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001329 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001330#else
1331 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001332#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001333 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001334}
1335
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001336LOCAL(void)
1337state_reset(SRE_STATE* state)
1338{
1339 int i;
1340
1341 state->lastmark = 0;
1342
1343 /* FIXME: dynamic! */
1344 for (i = 0; i < SRE_MARK_SIZE; i++)
1345 state->mark[i] = NULL;
1346
1347 state->lastindex = -1;
1348
1349 state->repeat = NULL;
1350
1351 mark_fini(state);
1352}
1353
Guido van Rossumb700df92000-03-31 14:59:30 +00001354LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001355state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1356 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001357{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001358 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001359
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001360 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001361 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001362 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001363
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001364 memset(state, 0, sizeof(SRE_STATE));
1365
1366 state->lastindex = -1;
1367
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001368#if defined(HAVE_UNICODE)
1369 if (PyUnicode_Check(string)) {
1370 /* unicode strings doesn't always support the buffer interface */
1371 ptr = (void*) PyUnicode_AS_DATA(string);
1372 bytes = PyUnicode_GET_DATA_SIZE(string);
1373 size = PyUnicode_GET_SIZE(string);
1374 state->charsize = sizeof(Py_UNICODE);
1375
1376 } else {
1377#endif
1378
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001379 /* get pointer to string buffer */
1380 buffer = string->ob_type->tp_as_buffer;
1381 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1382 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001383 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001384 return NULL;
1385 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001387 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001388 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1389 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001390 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1391 return NULL;
1392 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001393
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001394 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001395#if PY_VERSION_HEX >= 0x01060000
1396 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001397#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001398 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001399#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001400
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001401 if (PyString_Check(string) || bytes == size)
1402 state->charsize = 1;
1403#if defined(HAVE_UNICODE)
1404 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1405 state->charsize = sizeof(Py_UNICODE);
1406#endif
1407 else {
1408 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1409 return NULL;
1410 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001411
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001412#if defined(HAVE_UNICODE)
1413 }
1414#endif
1415
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001416 /* adjust boundaries */
1417 if (start < 0)
1418 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001419 else if (start > size)
1420 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001421
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001422 if (end < 0)
1423 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001424 else if (end > size)
1425 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001427 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001429 state->start = (void*) ((char*) ptr + start * state->charsize);
1430 state->end = (void*) ((char*) ptr + end * state->charsize);
1431
1432 Py_INCREF(string);
1433 state->string = string;
1434 state->pos = start;
1435 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001436
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001437 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001438 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001439 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001440#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001441 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001442#else
1443 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001444#endif
1445 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001446 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001447
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001448 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001449}
1450
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001451LOCAL(void)
1452state_fini(SRE_STATE* state)
1453{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001454 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001455 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001456}
1457
1458LOCAL(PyObject*)
1459state_getslice(SRE_STATE* state, int index, PyObject* string)
1460{
Fredrik Lundh58100642000-08-09 09:14:35 +00001461 int i, j;
1462
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001463 index = (index - 1) * 2;
1464
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001465 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001466 i = j = 0;
1467 } else {
1468 i = ((char*)state->mark[index] - (char*)state->beginning) /
1469 state->charsize;
1470 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1471 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001472 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001473
Fredrik Lundh58100642000-08-09 09:14:35 +00001474 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001475}
1476
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001477static void
1478pattern_error(int status)
1479{
1480 switch (status) {
1481 case SRE_ERROR_RECURSION_LIMIT:
1482 PyErr_SetString(
1483 PyExc_RuntimeError,
1484 "maximum recursion limit exceeded"
1485 );
1486 break;
1487 case SRE_ERROR_MEMORY:
1488 PyErr_NoMemory();
1489 break;
1490 default:
1491 /* other error codes indicate compiler/engine bugs */
1492 PyErr_SetString(
1493 PyExc_RuntimeError,
1494 "internal error in regular expression engine"
1495 );
1496 }
1497}
1498
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001499static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001500pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001501{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001502 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001503
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001504 MatchObject* match;
1505 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001506 char* base;
1507 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001508
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001509 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001510
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001511 /* create match object (with room for extra group marks) */
1512 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001513 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001514 if (!match)
1515 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001516
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001517 Py_INCREF(pattern);
1518 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001519
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001520 Py_INCREF(state->string);
1521 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001522
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001523 match->regs = NULL;
1524 match->groups = pattern->groups+1;
1525
1526 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001527
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001528 base = (char*) state->beginning;
1529 n = state->charsize;
1530
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001531 match->mark[0] = ((char*) state->start - base) / n;
1532 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001533
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001534 for (i = j = 0; i < pattern->groups; i++, j+=2)
1535 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1536 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1537 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1538 } else
1539 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1540
1541 match->pos = state->pos;
1542 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001543
Fredrik Lundh6f013982000-07-03 18:44:21 +00001544 match->lastindex = state->lastindex;
1545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001546 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001547
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001548 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001549
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001550 /* no match */
1551 Py_INCREF(Py_None);
1552 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001553
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001554 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001555
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001556 /* internal error */
1557 pattern_error(status);
1558 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001559}
1560
1561static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001562pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001563{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001564 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001565
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001566 ScannerObject* self;
1567
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001568 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001569 int start = 0;
1570 int end = INT_MAX;
1571 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1572 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001573
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001574 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001575 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001576 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001577 return NULL;
1578
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001579 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001580 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001581 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001582 return NULL;
1583 }
1584
1585 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001586 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001587
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001588 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001589}
1590
Guido van Rossumb700df92000-03-31 14:59:30 +00001591static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001592pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001593{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001594 Py_XDECREF(self->pattern);
1595 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001596 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001597 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001598}
1599
1600static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001601pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001602{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001603 SRE_STATE state;
1604 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001606 PyObject* string;
1607 int start = 0;
1608 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001609 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1610 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1611 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001612 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001613
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001614 string = state_init(&state, self, string, start, end);
1615 if (!string)
1616 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001617
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001618 state.ptr = state.start;
1619
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001620 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1621
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001622 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001623 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001624 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001625#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001626 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001627#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001628 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001629
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001630 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1631
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001632 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001633
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001634 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001635}
1636
1637static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001638pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001639{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001640 SRE_STATE state;
1641 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001642
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001643 PyObject* string;
1644 int start = 0;
1645 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001646 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1647 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1648 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001649 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001650
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001651 string = state_init(&state, self, string, start, end);
1652 if (!string)
1653 return NULL;
1654
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001655 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1656
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001657 if (state.charsize == 1) {
1658 status = sre_search(&state, PatternObject_GetCode(self));
1659 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001660#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001661 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001662#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001663 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001664
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001665 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1666
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001667 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001668
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001669 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001670}
1671
1672static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001673call(char* function, PyObject* args)
1674{
1675 PyObject* name;
1676 PyObject* module;
1677 PyObject* func;
1678 PyObject* result;
1679
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001680 name = PyString_FromString(SRE_MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001681 if (!name)
1682 return NULL;
1683 module = PyImport_Import(name);
1684 Py_DECREF(name);
1685 if (!module)
1686 return NULL;
1687 func = PyObject_GetAttrString(module, function);
1688 Py_DECREF(module);
1689 if (!func)
1690 return NULL;
1691 result = PyObject_CallObject(func, args);
1692 Py_DECREF(func);
1693 Py_DECREF(args);
1694 return result;
1695}
1696
1697static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001698pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001699{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001700 PyObject* template;
1701 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001702 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001703 static char* kwlist[] = { "repl", "string", "count", NULL };
1704 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1705 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001706 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001707
1708 /* delegate to Python code */
1709 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1710}
1711
1712static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001713pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001714{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001715 PyObject* template;
1716 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001717 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001718 static char* kwlist[] = { "repl", "string", "count", NULL };
1719 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1720 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001721 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001722
1723 /* delegate to Python code */
1724 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1725}
1726
1727static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001728pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001729{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001730 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001731 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001732 static char* kwlist[] = { "source", "maxsplit", NULL };
1733 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1734 &string, &maxsplit))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001735 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001736
1737 /* delegate to Python code */
1738 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1739}
1740
1741static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001742pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001743{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001744 SRE_STATE state;
1745 PyObject* list;
1746 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001747 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001749 PyObject* string;
1750 int start = 0;
1751 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001752 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1753 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1754 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001755 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001756
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001757 string = state_init(&state, self, string, start, end);
1758 if (!string)
1759 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001760
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001761 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001762
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001763 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001764
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001765 PyObject* item;
1766
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001767 state_reset(&state);
1768
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001769 state.ptr = state.start;
1770
1771 if (state.charsize == 1) {
1772 status = sre_search(&state, PatternObject_GetCode(self));
1773 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001774#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001775 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001776#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001777 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001778
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001779 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001780
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001781 /* don't bother to build a match object */
1782 switch (self->groups) {
1783 case 0:
1784 item = PySequence_GetSlice(
1785 string,
1786 ((char*) state.start - (char*) state.beginning) /
1787 state.charsize,
1788 ((char*) state.ptr - (char*) state.beginning) /
1789 state.charsize);
1790 if (!item)
1791 goto error;
1792 break;
1793 case 1:
1794 item = state_getslice(&state, 1, string);
1795 if (!item)
1796 goto error;
1797 break;
1798 default:
1799 item = PyTuple_New(self->groups);
1800 if (!item)
1801 goto error;
1802 for (i = 0; i < self->groups; i++) {
1803 PyObject* o = state_getslice(&state, i+1, string);
1804 if (!o) {
1805 Py_DECREF(item);
1806 goto error;
1807 }
1808 PyTuple_SET_ITEM(item, i, o);
1809 }
1810 break;
1811 }
1812
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001813 status = PyList_Append(list, item);
1814 Py_DECREF(item);
1815
1816 if (status < 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001817 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001818
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001819 if (state.ptr == state.start)
1820 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001821 else
1822 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001823
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001824 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001825
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001826 if (status == 0)
1827 break;
1828
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001829 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001830 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001831
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001832 }
1833 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001834
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001835 state_fini(&state);
1836 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001837
1838error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001839 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001840 state_fini(&state);
1841 return NULL;
1842
Guido van Rossumb700df92000-03-31 14:59:30 +00001843}
1844
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001845static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00001846 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1847 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1848 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1849 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1850 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1851 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001852 /* experimental */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001853 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001854 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001855};
1856
1857static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001858pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001859{
1860 PyObject* res;
1861
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001862 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001863
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001864 if (res)
1865 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001866
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001867 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001868
1869 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001870 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001871 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001873 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001874
1875 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001876 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001877
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001878 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001879 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001880
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001881 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001882 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001883 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001884 }
1885
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001886 PyErr_SetString(PyExc_AttributeError, name);
1887 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001888}
1889
1890statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001891 PyObject_HEAD_INIT(NULL)
1892 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001893 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001894 (destructor)pattern_dealloc, /*tp_dealloc*/
1895 0, /*tp_print*/
1896 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001897};
1898
1899/* -------------------------------------------------------------------- */
1900/* match methods */
1901
1902static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001903match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001904{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001905 Py_XDECREF(self->regs);
1906 Py_XDECREF(self->string);
1907 Py_DECREF(self->pattern);
1908 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001909}
1910
1911static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001912match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001913{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001914 if (index < 0 || index >= self->groups) {
1915 /* raise IndexError if we were given a bad group number */
1916 PyErr_SetString(
1917 PyExc_IndexError,
1918 "no such group"
1919 );
1920 return NULL;
1921 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001922
Fredrik Lundh6f013982000-07-03 18:44:21 +00001923 index *= 2;
1924
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001925 if (self->string == Py_None || self->mark[index] < 0) {
1926 /* return default value if the string or group is undefined */
1927 Py_INCREF(def);
1928 return def;
1929 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001930
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001931 return PySequence_GetSlice(
1932 self->string, self->mark[index], self->mark[index+1]
1933 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001934}
1935
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001936static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001937match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001938{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001939 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001940
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001941 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001942 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001943
Fredrik Lundh6f013982000-07-03 18:44:21 +00001944 i = -1;
1945
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001946 if (self->pattern->groupindex) {
1947 index = PyObject_GetItem(self->pattern->groupindex, index);
1948 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001949 if (PyInt_Check(index))
1950 i = (int) PyInt_AS_LONG(index);
1951 Py_DECREF(index);
1952 } else
1953 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001954 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001955
1956 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001957}
1958
1959static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001960match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001961{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001962 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001963}
1964
1965static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001966match_expand(MatchObject* self, PyObject* args)
1967{
1968 PyObject* template;
1969 if (!PyArg_ParseTuple(args, "O:expand", &template))
1970 return NULL;
1971
1972 /* delegate to Python code */
1973 return call(
1974 "_expand",
1975 Py_BuildValue("OOO", self->pattern, self, template)
1976 );
1977}
1978
1979static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001980match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001981{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001982 PyObject* result;
1983 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001984
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001985 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001986
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001987 switch (size) {
1988 case 0:
1989 result = match_getslice(self, Py_False, Py_None);
1990 break;
1991 case 1:
1992 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1993 break;
1994 default:
1995 /* fetch multiple items */
1996 result = PyTuple_New(size);
1997 if (!result)
1998 return NULL;
1999 for (i = 0; i < size; i++) {
2000 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002001 self, PyTuple_GET_ITEM(args, i), Py_None
2002 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002003 if (!item) {
2004 Py_DECREF(result);
2005 return NULL;
2006 }
2007 PyTuple_SET_ITEM(result, i, item);
2008 }
2009 break;
2010 }
2011 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002012}
2013
2014static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002015match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002016{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002017 PyObject* result;
2018 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002019
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002020 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002021 static char* kwlist[] = { "default", NULL };
2022 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002023 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002024
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002025 result = PyTuple_New(self->groups-1);
2026 if (!result)
2027 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002028
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002029 for (index = 1; index < self->groups; index++) {
2030 PyObject* item;
2031 item = match_getslice_by_index(self, index, def);
2032 if (!item) {
2033 Py_DECREF(result);
2034 return NULL;
2035 }
2036 PyTuple_SET_ITEM(result, index-1, item);
2037 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002038
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002039 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002040}
2041
2042static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002043match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002044{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 PyObject* result;
2046 PyObject* keys;
2047 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002048
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002049 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002050 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002051 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002052 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002053
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 result = PyDict_New();
2055 if (!result || !self->pattern->groupindex)
2056 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002057
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002059 if (!keys)
2060 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002062 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002063 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002065 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002067 if (!key)
2068 goto failed;
2069 value = match_getslice(self, key, def);
2070 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002071 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002072 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002073 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002074 status = PyDict_SetItem(result, key, value);
2075 Py_DECREF(value);
2076 if (status < 0)
2077 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002078 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002080 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002081
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002083
2084failed:
2085 Py_DECREF(keys);
2086 Py_DECREF(result);
2087 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002088}
2089
2090static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002091match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002092{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002093 int index;
2094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 PyObject* index_ = Py_False; /* zero */
2096 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2097 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002098
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002099 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002101 if (index < 0 || index >= self->groups) {
2102 PyErr_SetString(
2103 PyExc_IndexError,
2104 "no such group"
2105 );
2106 return NULL;
2107 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002108
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002109 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002110 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002111}
2112
2113static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002114match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002115{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002116 int index;
2117
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002118 PyObject* index_ = Py_False; /* zero */
2119 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2120 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002121
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002122 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002123
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002124 if (index < 0 || index >= self->groups) {
2125 PyErr_SetString(
2126 PyExc_IndexError,
2127 "no such group"
2128 );
2129 return NULL;
2130 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002131
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002132 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002133 return Py_BuildValue("i", self->mark[index*2+1]);
2134}
2135
2136LOCAL(PyObject*)
2137_pair(int i1, int i2)
2138{
2139 PyObject* pair;
2140 PyObject* item;
2141
2142 pair = PyTuple_New(2);
2143 if (!pair)
2144 return NULL;
2145
2146 item = PyInt_FromLong(i1);
2147 if (!item)
2148 goto error;
2149 PyTuple_SET_ITEM(pair, 0, item);
2150
2151 item = PyInt_FromLong(i2);
2152 if (!item)
2153 goto error;
2154 PyTuple_SET_ITEM(pair, 1, item);
2155
2156 return pair;
2157
2158 error:
2159 Py_DECREF(pair);
2160 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002161}
2162
2163static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002164match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002165{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002166 int index;
2167
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002168 PyObject* index_ = Py_False; /* zero */
2169 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2170 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002171
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002172 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002174 if (index < 0 || index >= self->groups) {
2175 PyErr_SetString(
2176 PyExc_IndexError,
2177 "no such group"
2178 );
2179 return NULL;
2180 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002181
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002182 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002183 return _pair(self->mark[index*2], self->mark[index*2+1]);
2184}
2185
2186static PyObject*
2187match_regs(MatchObject* self)
2188{
2189 PyObject* regs;
2190 PyObject* item;
2191 int index;
2192
2193 regs = PyTuple_New(self->groups);
2194 if (!regs)
2195 return NULL;
2196
2197 for (index = 0; index < self->groups; index++) {
2198 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2199 if (!item) {
2200 Py_DECREF(regs);
2201 return NULL;
2202 }
2203 PyTuple_SET_ITEM(regs, index, item);
2204 }
2205
2206 Py_INCREF(regs);
2207 self->regs = regs;
2208
2209 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002210}
2211
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002212static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002213 {"group", (PyCFunction) match_group, METH_VARARGS},
2214 {"start", (PyCFunction) match_start, METH_VARARGS},
2215 {"end", (PyCFunction) match_end, METH_VARARGS},
2216 {"span", (PyCFunction) match_span, METH_VARARGS},
2217 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2218 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2219 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002220 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002221};
2222
2223static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002224match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002225{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002226 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002227
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002228 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2229 if (res)
2230 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002232 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002233
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002234 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002235 if (self->lastindex >= 0)
2236 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002237 Py_INCREF(Py_None);
2238 return Py_None;
2239 }
2240
2241 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002242 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002243 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002244 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002245 );
2246 if (result)
2247 return result;
2248 PyErr_Clear();
2249 }
2250 Py_INCREF(Py_None);
2251 return Py_None;
2252 }
2253
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002254 if (!strcmp(name, "string")) {
2255 if (self->string) {
2256 Py_INCREF(self->string);
2257 return self->string;
2258 } else {
2259 Py_INCREF(Py_None);
2260 return Py_None;
2261 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002262 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002263
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002264 if (!strcmp(name, "regs")) {
2265 if (self->regs) {
2266 Py_INCREF(self->regs);
2267 return self->regs;
2268 } else
2269 return match_regs(self);
2270 }
2271
2272 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002273 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002274 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002275 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002276
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002277 if (!strcmp(name, "pos"))
2278 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002279
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002280 if (!strcmp(name, "endpos"))
2281 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002282
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002283 PyErr_SetString(PyExc_AttributeError, name);
2284 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002285}
2286
2287/* FIXME: implement setattr("string", None) as a special case (to
2288 detach the associated string, if any */
2289
2290statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002291 PyObject_HEAD_INIT(NULL)
2292 0, "SRE_Match",
2293 sizeof(MatchObject), sizeof(int),
2294 (destructor)match_dealloc, /*tp_dealloc*/
2295 0, /*tp_print*/
2296 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002297};
2298
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002299/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002300/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002301
2302static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002303scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002304{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002305 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002306 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002307 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002308}
2309
2310static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002311scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002312{
2313 SRE_STATE* state = &self->state;
2314 PyObject* match;
2315 int status;
2316
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002317 state_reset(state);
2318
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002319 state->ptr = state->start;
2320
2321 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002322 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002323 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002324#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002325 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002326#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002327 }
2328
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002329 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002330 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002331
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002332 if (status == 0 || state->ptr == state->start)
2333 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002334 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002335 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002336
2337 return match;
2338}
2339
2340
2341static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002342scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002343{
2344 SRE_STATE* state = &self->state;
2345 PyObject* match;
2346 int status;
2347
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002348 state_reset(state);
2349
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002350 state->ptr = state->start;
2351
2352 if (state->charsize == 1) {
2353 status = sre_search(state, PatternObject_GetCode(self->pattern));
2354 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002355#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002356 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002357#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002358 }
2359
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002360 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002361 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002362
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002363 if (status == 0 || state->ptr == state->start)
2364 state->start = (void*) ((char*) state->ptr + state->charsize);
2365 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002366 state->start = state->ptr;
2367
2368 return match;
2369}
2370
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002371static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002372 {"match", (PyCFunction) scanner_match, 0},
2373 {"search", (PyCFunction) scanner_search, 0},
2374 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002375};
2376
2377static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002378scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002379{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002380 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002382 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2383 if (res)
2384 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002385
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002386 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002387
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002388 /* attributes */
2389 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002390 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002391 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002392 }
2393
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002394 PyErr_SetString(PyExc_AttributeError, name);
2395 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002396}
2397
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002398statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002399 PyObject_HEAD_INIT(NULL)
2400 0, "SRE_Scanner",
2401 sizeof(ScannerObject), 0,
2402 (destructor)scanner_dealloc, /*tp_dealloc*/
2403 0, /*tp_print*/
2404 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002405};
2406
Guido van Rossumb700df92000-03-31 14:59:30 +00002407static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002408 {"compile", _compile, 1},
2409 {"getcodesize", sre_codesize, 1},
2410 {"getlower", sre_getlower, 1},
2411 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002412};
2413
Tim Peters5687ffe2001-02-28 16:44:18 +00002414DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002415init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002416{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002417 PyObject* m;
2418 PyObject* d;
2419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002420 /* Patch object types */
2421 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002422 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002423
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002424 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002425 d = PyModule_GetDict(m);
2426
2427 PyDict_SetItemString(
2428 d, "MAGIC", (PyObject*) PyInt_FromLong(SRE_MAGIC)
2429 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002430}
2431
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002432#endif /* !defined(SRE_RECURSIVE) */