blob: 3d4054a8c7dac2ab849851f73d476d4c362d3afd [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000027 * 2001-01-15 fl avoid recursion for MIN_UTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Guido van Rossumb700df92000-03-31 14:59:30 +000029 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000030 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000031 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000032 * This version of the SRE library can be redistributed under CNRI's
33 * Python 1.6 license. For any other use, please contact Secret Labs
34 * AB (info@pythonware.com).
35 *
Guido van Rossumb700df92000-03-31 14:59:30 +000036 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000037 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000038 * other compatibility work.
39 */
40
41#ifndef SRE_RECURSIVE
42
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000043char copyright[] = " SRE 2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000044
45#include "Python.h"
46
47#include "sre.h"
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000052#if !defined(SRE_MODULE)
53#define SRE_MODULE "sre"
54#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000055
Guido van Rossumb700df92000-03-31 14:59:30 +000056/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000057#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000058
Fredrik Lundh436c3d582000-06-29 08:58:44 +000059#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000060/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000061#define HAVE_UNICODE
62#endif
63
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000065/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000066
Fredrik Lundh33accc12000-08-27 20:59:47 +000067/* prevent run-away recursion (bad patterns on long strings) */
68
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000069#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000070#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
71/* require smaller recursion limit for a number of 64-bit platforms:
72 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
73/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
74#define USE_RECURSION_LIMIT 7500
75#else
76#define USE_RECURSION_LIMIT 10000
77#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000078#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000081#define USE_FAST_SEARCH
82
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000083/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000084#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000085
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000086#if PY_VERSION_HEX < 0x01060000
87#define PyObject_DEL(op) PyMem_DEL((op))
88#endif
89
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000090/* -------------------------------------------------------------------- */
91
Fredrik Lundh80946112000-06-29 18:03:25 +000092#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000093#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000094#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000095/* fastest possible local call under MSVC */
96#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000097#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000098#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000099#else
100#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000101#endif
102
103/* error codes */
104#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000105#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000106#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000107#define SRE_ERROR_MEMORY -9 /* out of memory */
108
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000109#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000110#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000111#else
112#define TRACE(v)
113#endif
114
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000115/* -------------------------------------------------------------------- */
116/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000117
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000118/* default character predicates (run sre_chars.py to regenerate tables) */
119
120#define SRE_DIGIT_MASK 1
121#define SRE_SPACE_MASK 2
122#define SRE_LINEBREAK_MASK 4
123#define SRE_ALNUM_MASK 8
124#define SRE_WORD_MASK 16
125
126static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1272, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12925, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1310, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
133
Fredrik Lundhb389df32000-06-29 12:48:37 +0000134static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000013510, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13627, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
13744, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13861, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
139108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
140122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
141106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
142120, 121, 122, 123, 124, 125, 126, 127 };
143
Fredrik Lundhb389df32000-06-29 12:48:37 +0000144static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000145{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000146 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000147}
148
149#define SRE_IS_DIGIT(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
151#define SRE_IS_SPACE(ch)\
152 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
153#define SRE_IS_LINEBREAK(ch)\
154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
155#define SRE_IS_ALNUM(ch)\
156 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
157#define SRE_IS_WORD(ch)\
158 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000159
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000160/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000161
Fredrik Lundhb389df32000-06-29 12:48:37 +0000162static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000163{
164 return ((ch) < 256 ? tolower((ch)) : ch);
165}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000166#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
167#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
168#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
169#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
170#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
171
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000172/* unicode-specific character predicates */
173
174#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000175static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176{
177 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
178}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000179#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
180#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
181#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000182#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000183#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000184#endif
185
Guido van Rossumb700df92000-03-31 14:59:30 +0000186LOCAL(int)
187sre_category(SRE_CODE category, unsigned int ch)
188{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000189 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000190
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000191 case SRE_CATEGORY_DIGIT:
192 return SRE_IS_DIGIT(ch);
193 case SRE_CATEGORY_NOT_DIGIT:
194 return !SRE_IS_DIGIT(ch);
195 case SRE_CATEGORY_SPACE:
196 return SRE_IS_SPACE(ch);
197 case SRE_CATEGORY_NOT_SPACE:
198 return !SRE_IS_SPACE(ch);
199 case SRE_CATEGORY_WORD:
200 return SRE_IS_WORD(ch);
201 case SRE_CATEGORY_NOT_WORD:
202 return !SRE_IS_WORD(ch);
203 case SRE_CATEGORY_LINEBREAK:
204 return SRE_IS_LINEBREAK(ch);
205 case SRE_CATEGORY_NOT_LINEBREAK:
206 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000207
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000208 case SRE_CATEGORY_LOC_WORD:
209 return SRE_LOC_IS_WORD(ch);
210 case SRE_CATEGORY_LOC_NOT_WORD:
211 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000212
213#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000214 case SRE_CATEGORY_UNI_DIGIT:
215 return SRE_UNI_IS_DIGIT(ch);
216 case SRE_CATEGORY_UNI_NOT_DIGIT:
217 return !SRE_UNI_IS_DIGIT(ch);
218 case SRE_CATEGORY_UNI_SPACE:
219 return SRE_UNI_IS_SPACE(ch);
220 case SRE_CATEGORY_UNI_NOT_SPACE:
221 return !SRE_UNI_IS_SPACE(ch);
222 case SRE_CATEGORY_UNI_WORD:
223 return SRE_UNI_IS_WORD(ch);
224 case SRE_CATEGORY_UNI_NOT_WORD:
225 return !SRE_UNI_IS_WORD(ch);
226 case SRE_CATEGORY_UNI_LINEBREAK:
227 return SRE_UNI_IS_LINEBREAK(ch);
228 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
229 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000230#else
231 case SRE_CATEGORY_UNI_DIGIT:
232 return SRE_IS_DIGIT(ch);
233 case SRE_CATEGORY_UNI_NOT_DIGIT:
234 return !SRE_IS_DIGIT(ch);
235 case SRE_CATEGORY_UNI_SPACE:
236 return SRE_IS_SPACE(ch);
237 case SRE_CATEGORY_UNI_NOT_SPACE:
238 return !SRE_IS_SPACE(ch);
239 case SRE_CATEGORY_UNI_WORD:
240 return SRE_LOC_IS_WORD(ch);
241 case SRE_CATEGORY_UNI_NOT_WORD:
242 return !SRE_LOC_IS_WORD(ch);
243 case SRE_CATEGORY_UNI_LINEBREAK:
244 return SRE_IS_LINEBREAK(ch);
245 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
246 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000247#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000248 }
249 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000250}
251
252/* helpers */
253
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000254static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000255mark_fini(SRE_STATE* state)
256{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000257 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000258 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000259 state->mark_stack = NULL;
260 }
261 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000262}
263
264static int
265mark_save(SRE_STATE* state, int lo, int hi)
266{
267 void* stack;
268 int size;
269 int minsize, newsize;
270
271 if (hi <= lo)
272 return 0;
273
274 size = (hi - lo) + 1;
275
276 newsize = state->mark_stack_size;
277 minsize = state->mark_stack_base + size;
278
279 if (newsize < minsize) {
280 /* create new stack */
281 if (!newsize) {
282 newsize = 512;
283 if (newsize < minsize)
284 newsize = minsize;
285 TRACE(("allocate stack %d\n", newsize));
286 stack = malloc(sizeof(void*) * newsize);
287 } else {
288 /* grow the stack */
289 while (newsize < minsize)
290 newsize += newsize;
291 TRACE(("grow stack to %d\n", newsize));
292 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
293 }
294 if (!stack) {
295 mark_fini(state);
296 return SRE_ERROR_MEMORY;
297 }
298 state->mark_stack = stack;
299 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000300 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000301
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000302 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000303
304 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
305 size * sizeof(void*));
306
307 state->mark_stack_base += size;
308
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000309 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000310}
311
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000312static int
313mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000314{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000315 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000316
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000317 if (hi <= lo)
318 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000319
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000320 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000321
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000322 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000323
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000324 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000325
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000326 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
327 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000328
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000329 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000330}
331
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000332/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000333
334#define SRE_CHAR unsigned char
335#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000336#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000337#define SRE_CHARSET sre_charset
338#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000339#define SRE_MATCH sre_match
340#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000341
342#if defined(HAVE_UNICODE)
343
Guido van Rossumb700df92000-03-31 14:59:30 +0000344#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000345#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000346#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000347
Guido van Rossumb700df92000-03-31 14:59:30 +0000348#undef SRE_SEARCH
349#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000350#undef SRE_INFO
351#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000352#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000353#undef SRE_AT
354#undef SRE_CHAR
355
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000356/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000357
358#define SRE_CHAR Py_UNICODE
359#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000360#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000361#define SRE_CHARSET sre_ucharset
362#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000363#define SRE_MATCH sre_umatch
364#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000365#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000366
367#endif /* SRE_RECURSIVE */
368
369/* -------------------------------------------------------------------- */
370/* String matching engine */
371
372/* the following section is compiled twice, with different character
373 settings */
374
375LOCAL(int)
376SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
377{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000378 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000379
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000380 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000382 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000383
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000384 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000385 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000386 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000387
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000388 case SRE_AT_BEGINNING_LINE:
389 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000390 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000391
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000392 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000393 return (((void*) (ptr+1) == state->end &&
394 SRE_IS_LINEBREAK((int) ptr[0])) ||
395 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000396
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 case SRE_AT_END_LINE:
398 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000399 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000400
Fredrik Lundh770617b2001-01-14 15:06:11 +0000401 case SRE_AT_END_STRING:
402 return ((void*) ptr == state->end);
403
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 case SRE_AT_BOUNDARY:
405 if (state->beginning == state->end)
406 return 0;
407 that = ((void*) ptr > state->beginning) ?
408 SRE_IS_WORD((int) ptr[-1]) : 0;
409 this = ((void*) ptr < state->end) ?
410 SRE_IS_WORD((int) ptr[0]) : 0;
411 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 case SRE_AT_NON_BOUNDARY:
414 if (state->beginning == state->end)
415 return 0;
416 that = ((void*) ptr > state->beginning) ?
417 SRE_IS_WORD((int) ptr[-1]) : 0;
418 this = ((void*) ptr < state->end) ?
419 SRE_IS_WORD((int) ptr[0]) : 0;
420 return this == that;
421 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000423 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000424}
425
426LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000427SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000428{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000430
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000431 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 for (;;) {
434 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000435
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000436 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000437 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000438 if (ch == set[0])
439 return ok;
440 set++;
441 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000442
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000443 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000444 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000445 if (set[0] <= ch && ch <= set[1])
446 return ok;
447 set += 2;
448 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000449
Fredrik Lundh3562f112000-07-02 12:00:07 +0000450 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000451 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000452 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
453 return ok;
454 set += 16;
455 break;
456
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000457 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000458 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000459 if (sre_category(set[0], (int) ch))
460 return ok;
461 set += 1;
462 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000463
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000464 case SRE_OP_NEGATE:
465 ok = !ok;
466 break;
467
468 case SRE_OP_FAILURE:
469 return !ok;
470
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000471 default:
472 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000473 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000474 return 0;
475 }
476 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000477}
478
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000479LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
480
481LOCAL(int)
482SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
483{
484 SRE_CODE chr;
485 SRE_CHAR* ptr = state->ptr;
486 SRE_CHAR* end = state->end;
487 int i;
488
489 /* adjust end */
490 if (maxcount < end - ptr && maxcount != 65535)
491 end = ptr + maxcount;
492
493 switch (pattern[0]) {
494
495 case SRE_OP_ANY:
496 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000497 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000498 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
499 ptr++;
500 break;
501
502 case SRE_OP_ANY_ALL:
503 /* repeated dot wildcare. skip to the end of the target
504 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000505 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000506 ptr = end;
507 break;
508
509 case SRE_OP_LITERAL:
510 /* repeated literal */
511 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000512 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000513 while (ptr < end && (SRE_CODE) *ptr == chr)
514 ptr++;
515 break;
516
517 case SRE_OP_LITERAL_IGNORE:
518 /* repeated literal */
519 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000520 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000521 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
522 ptr++;
523 break;
524
525 case SRE_OP_NOT_LITERAL:
526 /* repeated non-literal */
527 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000528 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000529 while (ptr < end && (SRE_CODE) *ptr != chr)
530 ptr++;
531 break;
532
533 case SRE_OP_NOT_LITERAL_IGNORE:
534 /* repeated non-literal */
535 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000536 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000537 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
538 ptr++;
539 break;
540
541 case SRE_OP_IN:
542 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000543 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
544 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000545 ptr++;
546 break;
547
548 default:
549 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000550 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000551 while ((SRE_CHAR*) state->ptr < end) {
552 i = SRE_MATCH(state, pattern, level);
553 if (i < 0)
554 return i;
555 if (!i)
556 break;
557 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000558 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
559 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000560 return (SRE_CHAR*) state->ptr - ptr;
561 }
562
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000563 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000564 return ptr - (SRE_CHAR*) state->ptr;
565}
566
Fredrik Lundh33accc12000-08-27 20:59:47 +0000567#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000568LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000569SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
570{
571 /* check if an SRE_OP_INFO block matches at the current position.
572 returns the number of SRE_CODE objects to skip if successful, 0
573 if no match */
574
575 SRE_CHAR* end = state->end;
576 SRE_CHAR* ptr = state->ptr;
577 int i;
578
579 /* check minimal length */
580 if (pattern[3] && (end - ptr) < pattern[3])
581 return 0;
582
583 /* check known prefix */
584 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
585 /* <length> <skip> <prefix data> <overlap data> */
586 for (i = 0; i < pattern[5]; i++)
587 if ((SRE_CODE) ptr[i] != pattern[7 + i])
588 return 0;
589 return pattern[0] + 2 * pattern[6];
590 }
591 return pattern[0];
592}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000593#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000594
595LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000596SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000597{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000598 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000599 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000601 SRE_CHAR* end = state->end;
602 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000603 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000604 SRE_REPEAT* rp;
605 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000606 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000607
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000608 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000609
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000610 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000611
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000612#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000613 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000614 return SRE_ERROR_RECURSION_LIMIT;
615#endif
616
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000617#if defined(USE_RECURSION_LIMIT)
618 if (level > USE_RECURSION_LIMIT)
619 return SRE_ERROR_RECURSION_LIMIT;
620#endif
621
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000622 if (pattern[0] == SRE_OP_INFO) {
623 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000624 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000625 if (pattern[3] && (end - ptr) < pattern[3]) {
626 TRACE(("reject (got %d chars, need %d)\n",
627 (end - ptr), pattern[3]));
628 return 0;
629 }
630 pattern += pattern[1] + 1;
631 }
632
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000633 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000634
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000635 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000636
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000637 case SRE_OP_FAILURE:
638 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000639 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000640 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000641
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000642 case SRE_OP_SUCCESS:
643 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000644 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000645 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000646 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000648 case SRE_OP_AT:
649 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000650 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000651 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000652 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000653 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000654 pattern++;
655 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000656
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000657 case SRE_OP_CATEGORY:
658 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000659 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000660 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000661 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000662 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000663 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000664 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000665 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000666
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000667 case SRE_OP_LITERAL:
668 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000669 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000670 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000671 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000672 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000673 pattern++;
674 ptr++;
675 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000676
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000677 case SRE_OP_NOT_LITERAL:
678 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000679 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000680 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000681 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000682 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000683 pattern++;
684 ptr++;
685 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000686
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000687 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000688 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000689 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000690 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000691 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
692 return 0;
693 ptr++;
694 break;
695
696 case SRE_OP_ANY_ALL:
697 /* match anything */
698 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000699 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000700 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000701 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000702 ptr++;
703 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000704
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000705 case SRE_OP_IN:
706 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000707 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000708 TRACE(("|%p|%p|IN\n", pattern, ptr));
709 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000710 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000711 pattern += pattern[0];
712 ptr++;
713 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000714
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000715 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000716 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000717 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000718 i = pattern[0];
719 {
720 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
721 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
722 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000723 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 while (p < e) {
725 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000726 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 p++; ptr++;
728 }
729 }
730 pattern++;
731 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000732
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000733 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000735 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 i = pattern[0];
737 {
738 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
739 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
740 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000741 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 while (p < e) {
743 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000744 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000745 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 p++; ptr++;
747 }
748 }
749 pattern++;
750 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000753 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000754 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000755 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000756 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 pattern++;
758 ptr++;
759 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000760
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000761 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000762 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000763 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000764 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000765 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000766 pattern++;
767 ptr++;
768 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000769
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000770 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000771 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000772 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000773 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000774 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000775 pattern += pattern[0];
776 ptr++;
777 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000778
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000779 case SRE_OP_MARK:
780 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000781 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000782 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000783 i = pattern[0];
784 if (i & 1)
785 state->lastindex = i/2 + 1;
786 if (i > state->lastmark)
787 state->lastmark = i;
788 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000789 pattern++;
790 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000791
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 case SRE_OP_JUMP:
793 case SRE_OP_INFO:
794 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000795 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000796 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000797 pattern += pattern[0];
798 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 case SRE_OP_ASSERT:
801 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000802 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000803 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000805 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000806 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000807 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000808 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000809 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000810 pattern += pattern[0];
811 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000812
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000813 case SRE_OP_ASSERT_NOT:
814 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000815 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000816 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000818 if (state->ptr >= state->beginning) {
819 i = SRE_MATCH(state, pattern + 2, level + 1);
820 if (i < 0)
821 return i;
822 if (i)
823 return 0;
824 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000825 pattern += pattern[0];
826 break;
827
828 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000829 /* alternation */
830 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000831 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000832 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000833 for (; pattern[0]; pattern += pattern[0]) {
834 if (pattern[1] == SRE_OP_LITERAL &&
835 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
836 continue;
837 if (pattern[1] == SRE_OP_IN &&
838 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
839 continue;
840 state->ptr = ptr;
841 i = SRE_MATCH(state, pattern + 1, level + 1);
842 if (i)
843 return i;
844 if (state->lastmark > lastmark) {
845 memset(
846 state->mark + lastmark + 1, 0,
847 (state->lastmark - lastmark) * sizeof(void*)
848 );
849 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000850 }
851 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000852 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000853
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000854 case SRE_OP_REPEAT_ONE:
855 /* match repeated sequence (maximizing regexp) */
856
857 /* this operator only works if the repeated item is
858 exactly one character wide, and we're not already
859 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000860 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000861
862 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
863
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000864 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000865 pattern[1], pattern[2]));
866
Fredrik Lundhe1869832000-08-01 22:47:49 +0000867 if (ptr + pattern[1] > end)
868 return 0; /* cannot match */
869
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000870 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000871
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000872 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
873 if (count < 0)
874 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000875
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000876 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000877
878 /* when we arrive here, count contains the number of
879 matches, and ptr points to the tail of the target
880 string. check if the rest of the pattern matches,
881 and backtrack if not. */
882
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000883 if (count < (int) pattern[1])
884 return 0;
885
886 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
887 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000888 state->ptr = ptr;
889 return 1;
890
891 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
892 /* tail starts with a literal. skip positions where
893 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000894 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000895 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000896 while (count >= (int) pattern[1] &&
897 (ptr >= end || *ptr != chr)) {
898 ptr--;
899 count--;
900 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000901 if (count < (int) pattern[1])
902 break;
903 state->ptr = ptr;
904 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000905 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000906 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000907 ptr--;
908 count--;
909 }
910
911 } else {
912 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000913 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000914 while (count >= (int) pattern[1]) {
915 state->ptr = ptr;
916 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000917 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000918 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000919 ptr--;
920 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000921 if (state->lastmark > lastmark) {
922 memset(
923 state->mark + lastmark + 1, 0,
924 (state->lastmark - lastmark) * sizeof(void*)
925 );
926 state->lastmark = lastmark;
927 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000928 }
929 }
930 return 0;
931
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000932 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000933 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +0000934 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000935 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000936 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000937 pattern[1], pattern[2]));
938
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000939 rep.count = -1;
940 rep.pattern = pattern;
941
942 /* install new repeat context */
943 rep.prev = state->repeat;
944 state->repeat = &rep;
945
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000946 state->ptr = ptr;
947 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000948
949 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000950
951 return i;
952
953 case SRE_OP_MAX_UNTIL:
954 /* maximizing repeat */
955 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
956
957 /* FIXME: we probably need to deal with zero-width
958 matches in here... */
959
960 rp = state->repeat;
961 if (!rp)
962 return SRE_ERROR_STATE;
963
964 state->ptr = ptr;
965
966 count = rp->count + 1;
967
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000968 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000969
970 if (count < rp->pattern[1]) {
971 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000972 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000973 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000974 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000975 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000976 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000977 rp->count = count - 1;
978 state->ptr = ptr;
979 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000980 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000981
982 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000983 /* we may have enough matches, but if we can
984 match another item, do so */
985 rp->count = count;
986 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000987 i = mark_save(state, 0, lastmark);
988 if (i < 0)
989 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000990 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000991 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000992 if (i)
993 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000994 i = mark_restore(state, 0, lastmark);
995 if (i < 0)
996 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000997 rp->count = count - 1;
998 state->ptr = ptr;
999 }
1000
1001 /* cannot match more repeated items here. make sure the
1002 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001003 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001004 i = SRE_MATCH(state, pattern, level + 1);
1005 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001006 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001007 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001008 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001009 return 0;
1010
1011 case SRE_OP_MIN_UNTIL:
1012 /* minimizing repeat */
1013 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1014
1015 rp = state->repeat;
1016 if (!rp)
1017 return SRE_ERROR_STATE;
1018
1019 count = rp->count + 1;
1020
Fredrik Lundh770617b2001-01-14 15:06:11 +00001021 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1022 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001023
1024 state->ptr = ptr;
1025
1026 if (count < rp->pattern[1]) {
1027 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001028 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001029 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001030 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001031 if (i)
1032 return i;
1033 rp->count = count-1;
1034 state->ptr = ptr;
1035 return 0;
1036 }
1037
1038 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001039 state->repeat = rp->prev;
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001040 if (rp->pattern[2] == 65535) {
1041 /* unbounded repeat */
1042 for (;;) {
1043 i = SRE_MATCH(state, pattern, level + 1);
1044 if (i || ptr >= end)
1045 break;
1046 state->ptr = ++ptr;
1047 }
1048 } else
1049 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001050 if (i) {
1051 /* free(rp); */
1052 return i;
1053 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001054
Fredrik Lundh770617b2001-01-14 15:06:11 +00001055 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001056 state->repeat = rp;
1057
1058 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1059 return 0;
1060
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001061 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001062 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001063 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001064 if (i)
1065 return i;
1066 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001067 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001068 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001069
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001070 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001071 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001072 return SRE_ERROR_ILLEGAL;
1073 }
1074 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001075
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001076 /* shouldn't end up here */
1077 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001078}
1079
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001080LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001081SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1082{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001083 SRE_CHAR* ptr = state->start;
1084 SRE_CHAR* end = state->end;
1085 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001086 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001087 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001088 SRE_CODE* prefix = NULL;
1089 SRE_CODE* charset = NULL;
1090 SRE_CODE* overlap = NULL;
1091 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001092
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001093 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001094 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001095 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001096
1097 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001098
1099 if (pattern[3] > 0) {
1100 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001101 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001102 end -= pattern[3]-1;
1103 if (end <= ptr)
1104 end = ptr+1;
1105 }
1106
Fredrik Lundh3562f112000-07-02 12:00:07 +00001107 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001108 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001109 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001110 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001111 prefix_skip = pattern[6];
1112 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001113 overlap = prefix + prefix_len - 1;
1114 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001115 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001116 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001117 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001118
1119 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001120 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001121
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001122 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1123 TRACE(("charset = %p\n", charset));
1124
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001125#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001126 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001127 /* pattern starts with a known prefix. use the overlap
1128 table to skip forward as fast as we possibly can */
1129 int i = 0;
1130 end = state->end;
1131 while (ptr < end) {
1132 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001133 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001134 if (!i)
1135 break;
1136 else
1137 i = overlap[i];
1138 } else {
1139 if (++i == prefix_len) {
1140 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001141 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1142 state->start = ptr + 1 - prefix_len;
1143 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001144 if (flags & SRE_INFO_LITERAL)
1145 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001146 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001147 if (status != 0)
1148 return status;
1149 /* close but no cigar -- try again */
1150 i = overlap[i];
1151 }
1152 break;
1153 }
1154
1155 }
1156 ptr++;
1157 }
1158 return 0;
1159 }
1160#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001161
Fredrik Lundh3562f112000-07-02 12:00:07 +00001162 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001163 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001164 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001165 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001166 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001167 for (;;) {
1168 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1169 ptr++;
1170 if (ptr == end)
1171 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001172 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001173 state->start = ptr;
1174 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001175 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001176 if (status != 0)
1177 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001178 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001179 } else if (charset) {
1180 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001181 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001182 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001183 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001184 ptr++;
1185 if (ptr == end)
1186 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001187 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001188 state->start = ptr;
1189 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001190 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001191 if (status != 0)
1192 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001193 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001194 }
1195 } else
1196 /* general case */
1197 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001198 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001199 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001200 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001201 if (status != 0)
1202 break;
1203 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001204
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001205 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001206}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001207
Guido van Rossumb700df92000-03-31 14:59:30 +00001208
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001209#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001210
1211/* -------------------------------------------------------------------- */
1212/* factories and destructors */
1213
1214/* see sre.h for object declarations */
1215
1216staticforward PyTypeObject Pattern_Type;
1217staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001218staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001219
1220static PyObject *
1221_compile(PyObject* self_, PyObject* args)
1222{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001223 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001224
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001225 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001226 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001227
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001228 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001229 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001230 PyObject* code;
1231 int groups = 0;
1232 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001233 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001234 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1235 &PyList_Type, &code, &groups,
1236 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001237 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001238
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001239 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001240
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001241 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001242 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001243 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001244
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001245 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001246 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001247 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001248 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001249
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001250 if (PyErr_Occurred()) {
1251 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001252 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001253 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001254
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001255 Py_INCREF(pattern);
1256 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001257
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001258 self->flags = flags;
1259
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001260 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001261
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001262 Py_XINCREF(groupindex);
1263 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001264
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001265 Py_XINCREF(indexgroup);
1266 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001267
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001268 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001269}
1270
1271static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001272sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001273{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001274 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001275}
1276
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001277static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001278sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001279{
1280 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001281 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001282 return NULL;
1283 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001284 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001285 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001286#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001287 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001288#else
1289 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001290#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001291 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001292}
1293
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001294LOCAL(void)
1295state_reset(SRE_STATE* state)
1296{
1297 int i;
1298
1299 state->lastmark = 0;
1300
1301 /* FIXME: dynamic! */
1302 for (i = 0; i < SRE_MARK_SIZE; i++)
1303 state->mark[i] = NULL;
1304
1305 state->lastindex = -1;
1306
1307 state->repeat = NULL;
1308
1309 mark_fini(state);
1310}
1311
Guido van Rossumb700df92000-03-31 14:59:30 +00001312LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001313state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1314 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001315{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001316 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001317
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001318 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001319 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001320 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001321
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001322 memset(state, 0, sizeof(SRE_STATE));
1323
1324 state->lastindex = -1;
1325
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001326#if defined(HAVE_UNICODE)
1327 if (PyUnicode_Check(string)) {
1328 /* unicode strings doesn't always support the buffer interface */
1329 ptr = (void*) PyUnicode_AS_DATA(string);
1330 bytes = PyUnicode_GET_DATA_SIZE(string);
1331 size = PyUnicode_GET_SIZE(string);
1332 state->charsize = sizeof(Py_UNICODE);
1333
1334 } else {
1335#endif
1336
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001337 /* get pointer to string buffer */
1338 buffer = string->ob_type->tp_as_buffer;
1339 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1340 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001341 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001342 return NULL;
1343 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001344
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001345 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001346 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1347 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001348 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1349 return NULL;
1350 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001352 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001353#if PY_VERSION_HEX >= 0x01060000
1354 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001355#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001356 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001357#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001358
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001359 if (PyString_Check(string) || bytes == size)
1360 state->charsize = 1;
1361#if defined(HAVE_UNICODE)
1362 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1363 state->charsize = sizeof(Py_UNICODE);
1364#endif
1365 else {
1366 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1367 return NULL;
1368 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001369
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001370#if defined(HAVE_UNICODE)
1371 }
1372#endif
1373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001374 /* adjust boundaries */
1375 if (start < 0)
1376 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001377 else if (start > size)
1378 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001379
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001380 if (end < 0)
1381 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001382 else if (end > size)
1383 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001385 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001387 state->start = (void*) ((char*) ptr + start * state->charsize);
1388 state->end = (void*) ((char*) ptr + end * state->charsize);
1389
1390 Py_INCREF(string);
1391 state->string = string;
1392 state->pos = start;
1393 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001394
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001395 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001396 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001397 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001398#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001399 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001400#else
1401 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001402#endif
1403 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001404 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001405
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001406 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001407}
1408
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001409LOCAL(void)
1410state_fini(SRE_STATE* state)
1411{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001412 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001413 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001414}
1415
1416LOCAL(PyObject*)
1417state_getslice(SRE_STATE* state, int index, PyObject* string)
1418{
Fredrik Lundh58100642000-08-09 09:14:35 +00001419 int i, j;
1420
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001421 index = (index - 1) * 2;
1422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001423 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001424 i = j = 0;
1425 } else {
1426 i = ((char*)state->mark[index] - (char*)state->beginning) /
1427 state->charsize;
1428 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1429 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001430 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001431
Fredrik Lundh58100642000-08-09 09:14:35 +00001432 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001433}
1434
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001435static void
1436pattern_error(int status)
1437{
1438 switch (status) {
1439 case SRE_ERROR_RECURSION_LIMIT:
1440 PyErr_SetString(
1441 PyExc_RuntimeError,
1442 "maximum recursion limit exceeded"
1443 );
1444 break;
1445 case SRE_ERROR_MEMORY:
1446 PyErr_NoMemory();
1447 break;
1448 default:
1449 /* other error codes indicate compiler/engine bugs */
1450 PyErr_SetString(
1451 PyExc_RuntimeError,
1452 "internal error in regular expression engine"
1453 );
1454 }
1455}
1456
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001457static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001458pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001459{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001460 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001461
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001462 MatchObject* match;
1463 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001464 char* base;
1465 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001466
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001467 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001468
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001469 /* create match object (with room for extra group marks) */
1470 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001471 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001472 if (!match)
1473 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001474
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001475 Py_INCREF(pattern);
1476 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001477
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001478 Py_INCREF(state->string);
1479 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001480
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001481 match->regs = NULL;
1482 match->groups = pattern->groups+1;
1483
1484 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001485
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001486 base = (char*) state->beginning;
1487 n = state->charsize;
1488
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001489 match->mark[0] = ((char*) state->start - base) / n;
1490 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001491
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001492 for (i = j = 0; i < pattern->groups; i++, j+=2)
1493 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1494 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1495 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1496 } else
1497 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1498
1499 match->pos = state->pos;
1500 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001501
Fredrik Lundh6f013982000-07-03 18:44:21 +00001502 match->lastindex = state->lastindex;
1503
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001504 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001505
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001506 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001507
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001508 /* no match */
1509 Py_INCREF(Py_None);
1510 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001511
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001512 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001513
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001514 /* internal error */
1515 pattern_error(status);
1516 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001517}
1518
1519static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001520pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001521{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001522 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001523
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001524 ScannerObject* self;
1525
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001526 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001527 int start = 0;
1528 int end = INT_MAX;
1529 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1530 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001531
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001532 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001533 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001534 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001535 return NULL;
1536
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001537 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001538 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001539 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001540 return NULL;
1541 }
1542
1543 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001544 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001546 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001547}
1548
Guido van Rossumb700df92000-03-31 14:59:30 +00001549static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001550pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001551{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001552 Py_XDECREF(self->pattern);
1553 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001554 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001555 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001556}
1557
1558static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001559pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001560{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001561 SRE_STATE state;
1562 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001563
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001564 PyObject* string;
1565 int start = 0;
1566 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001567 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1568 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1569 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001570 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001571
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 string = state_init(&state, self, string, start, end);
1573 if (!string)
1574 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001575
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001576 state.ptr = state.start;
1577
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001578 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1579
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001580 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001581 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001582 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001583#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001584 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001585#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001586 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001587
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001588 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1589
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001590 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001591
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001592 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001593}
1594
1595static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001596pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001597{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001598 SRE_STATE state;
1599 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001601 PyObject* string;
1602 int start = 0;
1603 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001604 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1605 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1606 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001607 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001608
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001609 string = state_init(&state, self, string, start, end);
1610 if (!string)
1611 return NULL;
1612
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001613 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1614
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001615 if (state.charsize == 1) {
1616 status = sre_search(&state, PatternObject_GetCode(self));
1617 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001618#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001619 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001620#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001621 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001622
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001623 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1624
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001625 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001626
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001627 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001628}
1629
1630static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001631call(char* function, PyObject* args)
1632{
1633 PyObject* name;
1634 PyObject* module;
1635 PyObject* func;
1636 PyObject* result;
1637
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001638 name = PyString_FromString(SRE_MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001639 if (!name)
1640 return NULL;
1641 module = PyImport_Import(name);
1642 Py_DECREF(name);
1643 if (!module)
1644 return NULL;
1645 func = PyObject_GetAttrString(module, function);
1646 Py_DECREF(module);
1647 if (!func)
1648 return NULL;
1649 result = PyObject_CallObject(func, args);
1650 Py_DECREF(func);
1651 Py_DECREF(args);
1652 return result;
1653}
1654
1655static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001656pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001657{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001658 PyObject* template;
1659 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001660 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001661 static char* kwlist[] = { "repl", "string", "count", NULL };
1662 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1663 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001664 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001665
1666 /* delegate to Python code */
1667 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1668}
1669
1670static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001671pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001672{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001673 PyObject* template;
1674 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001675 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001676 static char* kwlist[] = { "repl", "string", "count", NULL };
1677 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1678 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001679 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001680
1681 /* delegate to Python code */
1682 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1683}
1684
1685static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001686pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001687{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001688 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001689 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001690 static char* kwlist[] = { "source", "maxsplit", NULL };
1691 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1692 &string, &maxsplit))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001694
1695 /* delegate to Python code */
1696 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1697}
1698
1699static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001700pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001701{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 SRE_STATE state;
1703 PyObject* list;
1704 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001705 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001706
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001707 PyObject* string;
1708 int start = 0;
1709 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001710 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1711 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1712 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001713 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001714
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001715 string = state_init(&state, self, string, start, end);
1716 if (!string)
1717 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001718
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001719 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001720
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001721 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001722
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001723 PyObject* item;
1724
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001725 state_reset(&state);
1726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001727 state.ptr = state.start;
1728
1729 if (state.charsize == 1) {
1730 status = sre_search(&state, PatternObject_GetCode(self));
1731 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001732#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001733 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001734#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001735 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001736
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001737 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001738
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001739 /* don't bother to build a match object */
1740 switch (self->groups) {
1741 case 0:
1742 item = PySequence_GetSlice(
1743 string,
1744 ((char*) state.start - (char*) state.beginning) /
1745 state.charsize,
1746 ((char*) state.ptr - (char*) state.beginning) /
1747 state.charsize);
1748 if (!item)
1749 goto error;
1750 break;
1751 case 1:
1752 item = state_getslice(&state, 1, string);
1753 if (!item)
1754 goto error;
1755 break;
1756 default:
1757 item = PyTuple_New(self->groups);
1758 if (!item)
1759 goto error;
1760 for (i = 0; i < self->groups; i++) {
1761 PyObject* o = state_getslice(&state, i+1, string);
1762 if (!o) {
1763 Py_DECREF(item);
1764 goto error;
1765 }
1766 PyTuple_SET_ITEM(item, i, o);
1767 }
1768 break;
1769 }
1770
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001771 status = PyList_Append(list, item);
1772 Py_DECREF(item);
1773
1774 if (status < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001775 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001776
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001777 if (state.ptr == state.start)
1778 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001779 else
1780 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001781
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001782 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001783
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001784 if (status == 0)
1785 break;
1786
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001787 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001788 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001789
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001790 }
1791 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001792
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001793 state_fini(&state);
1794 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001795
1796error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001797 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001798 state_fini(&state);
1799 return NULL;
1800
Guido van Rossumb700df92000-03-31 14:59:30 +00001801}
1802
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001803static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00001804 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1805 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1806 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1807 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1808 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1809 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001810 /* experimental */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001811 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001812 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001813};
1814
1815static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001816pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001817{
1818 PyObject* res;
1819
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001820 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001821
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001822 if (res)
1823 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001824
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001825 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001826
1827 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001828 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001829 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001830 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001831 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001832
1833 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001834 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001835
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001836 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001837 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001838
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001839 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001840 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001841 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001842 }
1843
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001844 PyErr_SetString(PyExc_AttributeError, name);
1845 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001846}
1847
1848statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001849 PyObject_HEAD_INIT(NULL)
1850 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001851 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001852 (destructor)pattern_dealloc, /*tp_dealloc*/
1853 0, /*tp_print*/
1854 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001855};
1856
1857/* -------------------------------------------------------------------- */
1858/* match methods */
1859
1860static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001861match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001862{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001863 Py_XDECREF(self->regs);
1864 Py_XDECREF(self->string);
1865 Py_DECREF(self->pattern);
1866 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001867}
1868
1869static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001870match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001871{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 if (index < 0 || index >= self->groups) {
1873 /* raise IndexError if we were given a bad group number */
1874 PyErr_SetString(
1875 PyExc_IndexError,
1876 "no such group"
1877 );
1878 return NULL;
1879 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001880
Fredrik Lundh6f013982000-07-03 18:44:21 +00001881 index *= 2;
1882
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001883 if (self->string == Py_None || self->mark[index] < 0) {
1884 /* return default value if the string or group is undefined */
1885 Py_INCREF(def);
1886 return def;
1887 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001888
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001889 return PySequence_GetSlice(
1890 self->string, self->mark[index], self->mark[index+1]
1891 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001892}
1893
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001894static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001895match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001896{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001897 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001898
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001899 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001900 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001901
Fredrik Lundh6f013982000-07-03 18:44:21 +00001902 i = -1;
1903
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001904 if (self->pattern->groupindex) {
1905 index = PyObject_GetItem(self->pattern->groupindex, index);
1906 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001907 if (PyInt_Check(index))
1908 i = (int) PyInt_AS_LONG(index);
1909 Py_DECREF(index);
1910 } else
1911 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001912 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001913
1914 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001915}
1916
1917static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001918match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001919{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001920 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001921}
1922
1923static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001924match_expand(MatchObject* self, PyObject* args)
1925{
1926 PyObject* template;
1927 if (!PyArg_ParseTuple(args, "O:expand", &template))
1928 return NULL;
1929
1930 /* delegate to Python code */
1931 return call(
1932 "_expand",
1933 Py_BuildValue("OOO", self->pattern, self, template)
1934 );
1935}
1936
1937static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001938match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001939{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001940 PyObject* result;
1941 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001942
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001943 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001944
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001945 switch (size) {
1946 case 0:
1947 result = match_getslice(self, Py_False, Py_None);
1948 break;
1949 case 1:
1950 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1951 break;
1952 default:
1953 /* fetch multiple items */
1954 result = PyTuple_New(size);
1955 if (!result)
1956 return NULL;
1957 for (i = 0; i < size; i++) {
1958 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001959 self, PyTuple_GET_ITEM(args, i), Py_None
1960 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001961 if (!item) {
1962 Py_DECREF(result);
1963 return NULL;
1964 }
1965 PyTuple_SET_ITEM(result, i, item);
1966 }
1967 break;
1968 }
1969 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001970}
1971
1972static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001973match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001974{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001975 PyObject* result;
1976 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001977
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001978 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001979 static char* kwlist[] = { "default", NULL };
1980 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001981 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001982
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001983 result = PyTuple_New(self->groups-1);
1984 if (!result)
1985 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001986
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001987 for (index = 1; index < self->groups; index++) {
1988 PyObject* item;
1989 item = match_getslice_by_index(self, index, def);
1990 if (!item) {
1991 Py_DECREF(result);
1992 return NULL;
1993 }
1994 PyTuple_SET_ITEM(result, index-1, item);
1995 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001996
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001997 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001998}
1999
2000static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002001match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002002{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002003 PyObject* result;
2004 PyObject* keys;
2005 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002006
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002007 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002008 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002009 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002010 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002011
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002012 result = PyDict_New();
2013 if (!result || !self->pattern->groupindex)
2014 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002015
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002016 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002017 if (!keys)
2018 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002019
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002020 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002021 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002022 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002023 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002024 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002025 if (!key)
2026 goto failed;
2027 value = match_getslice(self, key, def);
2028 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002029 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002030 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002031 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002032 status = PyDict_SetItem(result, key, value);
2033 Py_DECREF(value);
2034 if (status < 0)
2035 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002036 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002037
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002038 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002039
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002040 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002041
2042failed:
2043 Py_DECREF(keys);
2044 Py_DECREF(result);
2045 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002046}
2047
2048static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002049match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002050{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002051 int index;
2052
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002053 PyObject* index_ = Py_False; /* zero */
2054 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2055 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002056
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002057 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002058
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002059 if (index < 0 || index >= self->groups) {
2060 PyErr_SetString(
2061 PyExc_IndexError,
2062 "no such group"
2063 );
2064 return NULL;
2065 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002066
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002067 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002068 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002069}
2070
2071static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002072match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002073{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002074 int index;
2075
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002076 PyObject* index_ = Py_False; /* zero */
2077 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2078 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002079
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002080 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002081
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 if (index < 0 || index >= self->groups) {
2083 PyErr_SetString(
2084 PyExc_IndexError,
2085 "no such group"
2086 );
2087 return NULL;
2088 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002089
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002090 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002091 return Py_BuildValue("i", self->mark[index*2+1]);
2092}
2093
2094LOCAL(PyObject*)
2095_pair(int i1, int i2)
2096{
2097 PyObject* pair;
2098 PyObject* item;
2099
2100 pair = PyTuple_New(2);
2101 if (!pair)
2102 return NULL;
2103
2104 item = PyInt_FromLong(i1);
2105 if (!item)
2106 goto error;
2107 PyTuple_SET_ITEM(pair, 0, item);
2108
2109 item = PyInt_FromLong(i2);
2110 if (!item)
2111 goto error;
2112 PyTuple_SET_ITEM(pair, 1, item);
2113
2114 return pair;
2115
2116 error:
2117 Py_DECREF(pair);
2118 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002119}
2120
2121static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002122match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002123{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002124 int index;
2125
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002126 PyObject* index_ = Py_False; /* zero */
2127 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2128 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002129
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002130 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002131
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002132 if (index < 0 || index >= self->groups) {
2133 PyErr_SetString(
2134 PyExc_IndexError,
2135 "no such group"
2136 );
2137 return NULL;
2138 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002139
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002140 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 return _pair(self->mark[index*2], self->mark[index*2+1]);
2142}
2143
2144static PyObject*
2145match_regs(MatchObject* self)
2146{
2147 PyObject* regs;
2148 PyObject* item;
2149 int index;
2150
2151 regs = PyTuple_New(self->groups);
2152 if (!regs)
2153 return NULL;
2154
2155 for (index = 0; index < self->groups; index++) {
2156 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2157 if (!item) {
2158 Py_DECREF(regs);
2159 return NULL;
2160 }
2161 PyTuple_SET_ITEM(regs, index, item);
2162 }
2163
2164 Py_INCREF(regs);
2165 self->regs = regs;
2166
2167 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002168}
2169
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002170static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002171 {"group", (PyCFunction) match_group, METH_VARARGS},
2172 {"start", (PyCFunction) match_start, METH_VARARGS},
2173 {"end", (PyCFunction) match_end, METH_VARARGS},
2174 {"span", (PyCFunction) match_span, METH_VARARGS},
2175 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2176 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2177 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002178 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002179};
2180
2181static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002182match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002183{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002184 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002185
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002186 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2187 if (res)
2188 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002189
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002190 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002191
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002192 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002193 if (self->lastindex >= 0)
2194 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002195 Py_INCREF(Py_None);
2196 return Py_None;
2197 }
2198
2199 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002200 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002201 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002202 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002203 );
2204 if (result)
2205 return result;
2206 PyErr_Clear();
2207 }
2208 Py_INCREF(Py_None);
2209 return Py_None;
2210 }
2211
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002212 if (!strcmp(name, "string")) {
2213 if (self->string) {
2214 Py_INCREF(self->string);
2215 return self->string;
2216 } else {
2217 Py_INCREF(Py_None);
2218 return Py_None;
2219 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002220 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002221
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002222 if (!strcmp(name, "regs")) {
2223 if (self->regs) {
2224 Py_INCREF(self->regs);
2225 return self->regs;
2226 } else
2227 return match_regs(self);
2228 }
2229
2230 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002231 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002232 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002233 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002234
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002235 if (!strcmp(name, "pos"))
2236 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002237
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002238 if (!strcmp(name, "endpos"))
2239 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002240
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002241 PyErr_SetString(PyExc_AttributeError, name);
2242 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002243}
2244
2245/* FIXME: implement setattr("string", None) as a special case (to
2246 detach the associated string, if any */
2247
2248statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002249 PyObject_HEAD_INIT(NULL)
2250 0, "SRE_Match",
2251 sizeof(MatchObject), sizeof(int),
2252 (destructor)match_dealloc, /*tp_dealloc*/
2253 0, /*tp_print*/
2254 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002255};
2256
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002257/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002258/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002259
2260static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002261scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002262{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002263 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002264 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002265 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002266}
2267
2268static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002269scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002270{
2271 SRE_STATE* state = &self->state;
2272 PyObject* match;
2273 int status;
2274
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002275 state_reset(state);
2276
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002277 state->ptr = state->start;
2278
2279 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002280 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002281 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002282#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002283 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002284#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002285 }
2286
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002287 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002288 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002289
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002290 if (status == 0 || state->ptr == state->start)
2291 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002292 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002293 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002294
2295 return match;
2296}
2297
2298
2299static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002300scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002301{
2302 SRE_STATE* state = &self->state;
2303 PyObject* match;
2304 int status;
2305
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002306 state_reset(state);
2307
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002308 state->ptr = state->start;
2309
2310 if (state->charsize == 1) {
2311 status = sre_search(state, PatternObject_GetCode(self->pattern));
2312 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002313#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002314 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002315#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002316 }
2317
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002318 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002319 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002320
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002321 if (status == 0 || state->ptr == state->start)
2322 state->start = (void*) ((char*) state->ptr + state->charsize);
2323 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002324 state->start = state->ptr;
2325
2326 return match;
2327}
2328
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002329static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002330 {"match", (PyCFunction) scanner_match, 0},
2331 {"search", (PyCFunction) scanner_search, 0},
2332 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002333};
2334
2335static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002336scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002337{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002338 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002339
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002340 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2341 if (res)
2342 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002343
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002344 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002345
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002346 /* attributes */
2347 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002348 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002349 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002350 }
2351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002352 PyErr_SetString(PyExc_AttributeError, name);
2353 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002354}
2355
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002356statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002357 PyObject_HEAD_INIT(NULL)
2358 0, "SRE_Scanner",
2359 sizeof(ScannerObject), 0,
2360 (destructor)scanner_dealloc, /*tp_dealloc*/
2361 0, /*tp_print*/
2362 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002363};
2364
Guido van Rossumb700df92000-03-31 14:59:30 +00002365static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002366 {"compile", _compile, 1},
2367 {"getcodesize", sre_codesize, 1},
2368 {"getlower", sre_getlower, 1},
2369 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002370};
2371
2372void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002373#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002374__declspec(dllexport)
2375#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002376init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002377{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002378 PyObject* m;
2379 PyObject* d;
2380
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002381 /* Patch object types */
2382 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002383 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002384
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002385 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002386 d = PyModule_GetDict(m);
2387
2388 PyDict_SetItemString(
2389 d, "MAGIC", (PyObject*) PyInt_FromLong(SRE_MAGIC)
2390 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002391}
2392
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002393#endif /* !defined(SRE_RECURSIVE) */