blob: c78ed52c421c14bd965776c4b0da7c40577187e1 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000027 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000029 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000030 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000031 * 2001-04-28 fl added __copy__ methods (work in progress)
32 * 2001-05-14 fl fixes for 1.5.2
Fredrik Lundhf71ae462001-07-02 17:04:48 +000033 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000034 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000035 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000036 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh6de22ef2001-10-22 21:18:08 +000037 * 2001-10-22 fl check for literal sub/subn templates
Fredrik Lundh703ce812001-10-24 22:16:30 +000038 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Guido van Rossumb700df92000-03-31 14:59:30 +000039 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000040 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000041 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000042 * This version of the SRE library can be redistributed under CNRI's
43 * Python 1.6 license. For any other use, please contact Secret Labs
44 * AB (info@pythonware.com).
45 *
Guido van Rossumb700df92000-03-31 14:59:30 +000046 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000047 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000048 * other compatibility work.
49 */
50
51#ifndef SRE_RECURSIVE
52
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000053static char copyright[] =
Fredrik Lundhbec95b92001-10-21 16:47:57 +000054 " SRE 2.2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000055
56#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000057#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000058
59#include "sre.h"
60
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000061#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000062
Fredrik Lundh436c3d582000-06-29 08:58:44 +000063/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000064#if !defined(SRE_MODULE)
65#define SRE_MODULE "sre"
66#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000067
Guido van Rossumb700df92000-03-31 14:59:30 +000068/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000070
Fredrik Lundh971e78b2001-10-20 17:48:46 +000071#if PY_VERSION_HEX >= 0x01060000
72#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000073/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000074#define HAVE_UNICODE
75#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000076#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000077
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080
Fredrik Lundh33accc12000-08-27 20:59:47 +000081/* prevent run-away recursion (bad patterns on long strings) */
82
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000083#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000084#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
85/* require smaller recursion limit for a number of 64-bit platforms:
86 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
87/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
88#define USE_RECURSION_LIMIT 7500
89#else
90#define USE_RECURSION_LIMIT 10000
91#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000092#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000093
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000094/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000095#define USE_FAST_SEARCH
96
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000097/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000098#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000099
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000100/* enables copy/deepcopy handling (work in progress) */
101#undef USE_BUILTIN_COPY
102
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000103#if PY_VERSION_HEX < 0x01060000
104#define PyObject_DEL(op) PyMem_DEL((op))
105#endif
106
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000107/* -------------------------------------------------------------------- */
108
Fredrik Lundh80946112000-06-29 18:03:25 +0000109#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +0000110#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +0000111#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +0000112/* fastest possible local call under MSVC */
113#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000114#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000115#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000116#else
117#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000118#endif
119
120/* error codes */
121#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000122#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000123#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000124#define SRE_ERROR_MEMORY -9 /* out of memory */
125
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000126#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000127#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000128#else
129#define TRACE(v)
130#endif
131
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000132/* -------------------------------------------------------------------- */
133/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000134
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000135/* default character predicates (run sre_chars.py to regenerate tables) */
136
137#define SRE_DIGIT_MASK 1
138#define SRE_SPACE_MASK 2
139#define SRE_LINEBREAK_MASK 4
140#define SRE_ALNUM_MASK 8
141#define SRE_WORD_MASK 16
142
Fredrik Lundh21009b92001-09-18 18:47:09 +0000143/* FIXME: this assumes ASCII. create tables in init_sre() instead */
144
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000145static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1462, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1470, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
14825, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
14924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1500, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
15124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
152
Fredrik Lundhb389df32000-06-29 12:48:37 +0000153static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000015410, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
15527, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
15644, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
15761, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
158108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
159122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
160106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
161120, 121, 122, 123, 124, 125, 126, 127 };
162
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000163#define SRE_IS_DIGIT(ch)\
164 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
165#define SRE_IS_SPACE(ch)\
166 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
167#define SRE_IS_LINEBREAK(ch)\
168 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
169#define SRE_IS_ALNUM(ch)\
170 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
171#define SRE_IS_WORD(ch)\
172 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000173
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000174static unsigned int sre_lower(unsigned int ch)
175{
176 return ((ch) < 128 ? sre_char_lower[ch] : ch);
177}
178
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000179/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000180
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000181#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
182#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
183#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
184#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
185#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
186
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000187static unsigned int sre_lower_locale(unsigned int ch)
188{
189 return ((ch) < 256 ? tolower((ch)) : ch);
190}
191
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000192/* unicode-specific character predicates */
193
194#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000195
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000196#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
197#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
198#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000199#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000200#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000201
202static unsigned int sre_lower_unicode(unsigned int ch)
203{
204 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
205}
206
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000207#endif
208
Guido van Rossumb700df92000-03-31 14:59:30 +0000209LOCAL(int)
210sre_category(SRE_CODE category, unsigned int ch)
211{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000212 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000213
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000214 case SRE_CATEGORY_DIGIT:
215 return SRE_IS_DIGIT(ch);
216 case SRE_CATEGORY_NOT_DIGIT:
217 return !SRE_IS_DIGIT(ch);
218 case SRE_CATEGORY_SPACE:
219 return SRE_IS_SPACE(ch);
220 case SRE_CATEGORY_NOT_SPACE:
221 return !SRE_IS_SPACE(ch);
222 case SRE_CATEGORY_WORD:
223 return SRE_IS_WORD(ch);
224 case SRE_CATEGORY_NOT_WORD:
225 return !SRE_IS_WORD(ch);
226 case SRE_CATEGORY_LINEBREAK:
227 return SRE_IS_LINEBREAK(ch);
228 case SRE_CATEGORY_NOT_LINEBREAK:
229 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000230
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000231 case SRE_CATEGORY_LOC_WORD:
232 return SRE_LOC_IS_WORD(ch);
233 case SRE_CATEGORY_LOC_NOT_WORD:
234 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000235
236#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000237 case SRE_CATEGORY_UNI_DIGIT:
238 return SRE_UNI_IS_DIGIT(ch);
239 case SRE_CATEGORY_UNI_NOT_DIGIT:
240 return !SRE_UNI_IS_DIGIT(ch);
241 case SRE_CATEGORY_UNI_SPACE:
242 return SRE_UNI_IS_SPACE(ch);
243 case SRE_CATEGORY_UNI_NOT_SPACE:
244 return !SRE_UNI_IS_SPACE(ch);
245 case SRE_CATEGORY_UNI_WORD:
246 return SRE_UNI_IS_WORD(ch);
247 case SRE_CATEGORY_UNI_NOT_WORD:
248 return !SRE_UNI_IS_WORD(ch);
249 case SRE_CATEGORY_UNI_LINEBREAK:
250 return SRE_UNI_IS_LINEBREAK(ch);
251 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
252 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000253#else
254 case SRE_CATEGORY_UNI_DIGIT:
255 return SRE_IS_DIGIT(ch);
256 case SRE_CATEGORY_UNI_NOT_DIGIT:
257 return !SRE_IS_DIGIT(ch);
258 case SRE_CATEGORY_UNI_SPACE:
259 return SRE_IS_SPACE(ch);
260 case SRE_CATEGORY_UNI_NOT_SPACE:
261 return !SRE_IS_SPACE(ch);
262 case SRE_CATEGORY_UNI_WORD:
263 return SRE_LOC_IS_WORD(ch);
264 case SRE_CATEGORY_UNI_NOT_WORD:
265 return !SRE_LOC_IS_WORD(ch);
266 case SRE_CATEGORY_UNI_LINEBREAK:
267 return SRE_IS_LINEBREAK(ch);
268 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
269 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000270#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000271 }
272 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000273}
274
275/* helpers */
276
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000277static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278mark_fini(SRE_STATE* state)
279{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000280 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000281 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000282 state->mark_stack = NULL;
283 }
284 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000285}
286
287static int
288mark_save(SRE_STATE* state, int lo, int hi)
289{
290 void* stack;
291 int size;
292 int minsize, newsize;
293
294 if (hi <= lo)
295 return 0;
296
297 size = (hi - lo) + 1;
298
299 newsize = state->mark_stack_size;
300 minsize = state->mark_stack_base + size;
301
302 if (newsize < minsize) {
303 /* create new stack */
304 if (!newsize) {
305 newsize = 512;
306 if (newsize < minsize)
307 newsize = minsize;
308 TRACE(("allocate stack %d\n", newsize));
309 stack = malloc(sizeof(void*) * newsize);
310 } else {
311 /* grow the stack */
312 while (newsize < minsize)
313 newsize += newsize;
314 TRACE(("grow stack to %d\n", newsize));
315 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
316 }
317 if (!stack) {
318 mark_fini(state);
319 return SRE_ERROR_MEMORY;
320 }
321 state->mark_stack = stack;
322 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000323 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000324
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000325 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000326
327 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
328 size * sizeof(void*));
329
330 state->mark_stack_base += size;
331
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000332 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000333}
334
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000335static int
336mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000337{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000338 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000339
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000340 if (hi <= lo)
341 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000342
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000343 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000344
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000345 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000346
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000347 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000348
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000349 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
350 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000352 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000353}
354
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000355/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000356
357#define SRE_CHAR unsigned char
358#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000359#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000360#define SRE_CHARSET sre_charset
361#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000362#define SRE_MATCH sre_match
363#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000364#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000365
366#if defined(HAVE_UNICODE)
367
Guido van Rossumb700df92000-03-31 14:59:30 +0000368#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000369#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000370#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000371
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000372#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000373#undef SRE_SEARCH
374#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000375#undef SRE_INFO
376#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000377#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000378#undef SRE_AT
379#undef SRE_CHAR
380
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000381/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000382
383#define SRE_CHAR Py_UNICODE
384#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000385#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000386#define SRE_CHARSET sre_ucharset
387#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000388#define SRE_MATCH sre_umatch
389#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000390#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000391#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
393#endif /* SRE_RECURSIVE */
394
395/* -------------------------------------------------------------------- */
396/* String matching engine */
397
398/* the following section is compiled twice, with different character
399 settings */
400
401LOCAL(int)
402SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
403{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000405
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000406 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000407
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000408 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000411 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000412 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000413
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000414 case SRE_AT_BEGINNING_LINE:
415 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000416 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000417
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000418 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000419 return (((void*) (ptr+1) == state->end &&
420 SRE_IS_LINEBREAK((int) ptr[0])) ||
421 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000423 case SRE_AT_END_LINE:
424 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000425 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000426
Fredrik Lundh770617b2001-01-14 15:06:11 +0000427 case SRE_AT_END_STRING:
428 return ((void*) ptr == state->end);
429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 case SRE_AT_BOUNDARY:
431 if (state->beginning == state->end)
432 return 0;
433 that = ((void*) ptr > state->beginning) ?
434 SRE_IS_WORD((int) ptr[-1]) : 0;
435 this = ((void*) ptr < state->end) ?
436 SRE_IS_WORD((int) ptr[0]) : 0;
437 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000438
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000439 case SRE_AT_NON_BOUNDARY:
440 if (state->beginning == state->end)
441 return 0;
442 that = ((void*) ptr > state->beginning) ?
443 SRE_IS_WORD((int) ptr[-1]) : 0;
444 this = ((void*) ptr < state->end) ?
445 SRE_IS_WORD((int) ptr[0]) : 0;
446 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000447
448 case SRE_AT_LOC_BOUNDARY:
449 if (state->beginning == state->end)
450 return 0;
451 that = ((void*) ptr > state->beginning) ?
452 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
453 this = ((void*) ptr < state->end) ?
454 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
455 return this != that;
456
457 case SRE_AT_LOC_NON_BOUNDARY:
458 if (state->beginning == state->end)
459 return 0;
460 that = ((void*) ptr > state->beginning) ?
461 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
462 this = ((void*) ptr < state->end) ?
463 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
464 return this == that;
465
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000466#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000467 case SRE_AT_UNI_BOUNDARY:
468 if (state->beginning == state->end)
469 return 0;
470 that = ((void*) ptr > state->beginning) ?
471 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
472 this = ((void*) ptr < state->end) ?
473 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
474 return this != that;
475
476 case SRE_AT_UNI_NON_BOUNDARY:
477 if (state->beginning == state->end)
478 return 0;
479 that = ((void*) ptr > state->beginning) ?
480 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
481 this = ((void*) ptr < state->end) ?
482 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
483 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000484#endif
485
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000486 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000487
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000488 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000489}
490
491LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000492SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000493{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000494 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000495
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000496 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000497
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000498 for (;;) {
499 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000500
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000501 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000502 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000503 if (ch == set[0])
504 return ok;
505 set++;
506 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000507
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000508 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000509 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000510 if (set[0] <= ch && ch <= set[1])
511 return ok;
512 set += 2;
513 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000514
Fredrik Lundh3562f112000-07-02 12:00:07 +0000515 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000516 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000517 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
518 return ok;
519 set += 16;
520 break;
521
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000522 case SRE_OP_BIGCHARSET:
523 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
524 {
525 int count, block;
526 count = *(set++);
527 block = ((unsigned char*)set)[ch >> 8];
528 set += 128;
529 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
530 return ok;
531 set += count*16;
532 break;
533 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000534
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000535 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000536 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000537 if (sre_category(set[0], (int) ch))
538 return ok;
539 set += 1;
540 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000541
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000542 case SRE_OP_NEGATE:
543 ok = !ok;
544 break;
545
546 case SRE_OP_FAILURE:
547 return !ok;
548
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000549 default:
550 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000551 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000552 return 0;
553 }
554 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000555}
556
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000557LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
558
559LOCAL(int)
560SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
561{
562 SRE_CODE chr;
563 SRE_CHAR* ptr = state->ptr;
564 SRE_CHAR* end = state->end;
565 int i;
566
567 /* adjust end */
568 if (maxcount < end - ptr && maxcount != 65535)
569 end = ptr + maxcount;
570
571 switch (pattern[0]) {
572
573 case SRE_OP_ANY:
574 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000575 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000576 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
577 ptr++;
578 break;
579
580 case SRE_OP_ANY_ALL:
581 /* repeated dot wildcare. skip to the end of the target
582 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000583 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000584 ptr = end;
585 break;
586
587 case SRE_OP_LITERAL:
588 /* repeated literal */
589 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000590 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000591 while (ptr < end && (SRE_CODE) *ptr == chr)
592 ptr++;
593 break;
594
595 case SRE_OP_LITERAL_IGNORE:
596 /* repeated literal */
597 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000598 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000599 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
600 ptr++;
601 break;
602
603 case SRE_OP_NOT_LITERAL:
604 /* repeated non-literal */
605 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000606 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000607 while (ptr < end && (SRE_CODE) *ptr != chr)
608 ptr++;
609 break;
610
611 case SRE_OP_NOT_LITERAL_IGNORE:
612 /* repeated non-literal */
613 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000614 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000615 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
616 ptr++;
617 break;
618
619 case SRE_OP_IN:
620 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000621 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
622 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000623 ptr++;
624 break;
625
626 default:
627 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000628 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000629 while ((SRE_CHAR*) state->ptr < end) {
630 i = SRE_MATCH(state, pattern, level);
631 if (i < 0)
632 return i;
633 if (!i)
634 break;
635 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000636 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
637 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000638 return (SRE_CHAR*) state->ptr - ptr;
639 }
640
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000641 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000642 return ptr - (SRE_CHAR*) state->ptr;
643}
644
Fredrik Lundh33accc12000-08-27 20:59:47 +0000645#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000646LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000647SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
648{
649 /* check if an SRE_OP_INFO block matches at the current position.
650 returns the number of SRE_CODE objects to skip if successful, 0
651 if no match */
652
653 SRE_CHAR* end = state->end;
654 SRE_CHAR* ptr = state->ptr;
655 int i;
656
657 /* check minimal length */
658 if (pattern[3] && (end - ptr) < pattern[3])
659 return 0;
660
661 /* check known prefix */
662 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
663 /* <length> <skip> <prefix data> <overlap data> */
664 for (i = 0; i < pattern[5]; i++)
665 if ((SRE_CODE) ptr[i] != pattern[7 + i])
666 return 0;
667 return pattern[0] + 2 * pattern[6];
668 }
669 return pattern[0];
670}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000671#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000672
673LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000674SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000675{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000676 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000677 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000678
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000679 SRE_CHAR* end = state->end;
680 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000681 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000682 SRE_REPEAT* rp;
683 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000684 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000685
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000686 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000687
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000688 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000689
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000690#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000691 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000692 return SRE_ERROR_RECURSION_LIMIT;
693#endif
694
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000695#if defined(USE_RECURSION_LIMIT)
696 if (level > USE_RECURSION_LIMIT)
697 return SRE_ERROR_RECURSION_LIMIT;
698#endif
699
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000700 if (pattern[0] == SRE_OP_INFO) {
701 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000702 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000703 if (pattern[3] && (end - ptr) < pattern[3]) {
704 TRACE(("reject (got %d chars, need %d)\n",
705 (end - ptr), pattern[3]));
706 return 0;
707 }
708 pattern += pattern[1] + 1;
709 }
710
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000711 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000712
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000713 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000714
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000715 case SRE_OP_FAILURE:
716 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000717 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000718 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000719
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000720 case SRE_OP_SUCCESS:
721 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000722 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000723 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000724 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000725
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000726 case SRE_OP_AT:
727 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000728 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000729 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000730 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000731 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 pattern++;
733 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000734
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000735 case SRE_OP_CATEGORY:
736 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000737 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000738 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000740 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000741 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000742 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000744
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 case SRE_OP_LITERAL:
746 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000747 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000748 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000749 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000750 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000751 pattern++;
752 ptr++;
753 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 case SRE_OP_NOT_LITERAL:
756 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000757 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000758 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000760 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000761 pattern++;
762 ptr++;
763 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000764
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000765 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000766 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000767 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000768 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000769 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
770 return 0;
771 ptr++;
772 break;
773
774 case SRE_OP_ANY_ALL:
775 /* match anything */
776 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000777 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000778 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000779 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 ptr++;
781 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000782
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000783 case SRE_OP_IN:
784 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000785 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000786 TRACE(("|%p|%p|IN\n", pattern, ptr));
787 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000788 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000789 pattern += pattern[0];
790 ptr++;
791 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000792
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000793 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000794 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000795 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000796 i = pattern[0];
797 {
798 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
799 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
800 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000801 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000802 while (p < e) {
803 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000804 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000805 p++; ptr++;
806 }
807 }
808 pattern++;
809 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000810
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000811 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000812 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000813 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000814 i = pattern[0];
815 {
816 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
817 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
818 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000819 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000820 while (p < e) {
821 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000822 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000823 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000824 p++; ptr++;
825 }
826 }
827 pattern++;
828 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000829
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000830 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000831 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000832 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000833 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000834 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000835 pattern++;
836 ptr++;
837 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000838
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000839 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000840 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000841 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000842 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000843 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000844 pattern++;
845 ptr++;
846 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000847
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000848 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000849 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000850 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000851 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000852 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000853 pattern += pattern[0];
854 ptr++;
855 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000857 case SRE_OP_MARK:
858 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000859 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000860 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000861 i = pattern[0];
862 if (i & 1)
863 state->lastindex = i/2 + 1;
864 if (i > state->lastmark)
865 state->lastmark = i;
866 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000867 pattern++;
868 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000869
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000870 case SRE_OP_JUMP:
871 case SRE_OP_INFO:
872 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000873 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000874 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000875 pattern += pattern[0];
876 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000877
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000878 case SRE_OP_ASSERT:
879 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000880 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000881 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000882 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000883 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000884 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000885 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000886 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000887 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000888 pattern += pattern[0];
889 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000890
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000891 case SRE_OP_ASSERT_NOT:
892 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000893 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000894 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000895 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000896 if (state->ptr >= state->beginning) {
897 i = SRE_MATCH(state, pattern + 2, level + 1);
898 if (i < 0)
899 return i;
900 if (i)
901 return 0;
902 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000903 pattern += pattern[0];
904 break;
905
906 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000907 /* alternation */
908 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000909 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000910 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000911 for (; pattern[0]; pattern += pattern[0]) {
912 if (pattern[1] == SRE_OP_LITERAL &&
913 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
914 continue;
915 if (pattern[1] == SRE_OP_IN &&
916 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
917 continue;
918 state->ptr = ptr;
919 i = SRE_MATCH(state, pattern + 1, level + 1);
920 if (i)
921 return i;
922 if (state->lastmark > lastmark) {
923 memset(
924 state->mark + lastmark + 1, 0,
925 (state->lastmark - lastmark) * sizeof(void*)
926 );
927 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000928 }
929 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000930 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000931
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000932 case SRE_OP_REPEAT_ONE:
933 /* match repeated sequence (maximizing regexp) */
934
935 /* this operator only works if the repeated item is
936 exactly one character wide, and we're not already
937 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000938 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000939
940 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
941
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000942 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000943 pattern[1], pattern[2]));
944
Fredrik Lundhe1869832000-08-01 22:47:49 +0000945 if (ptr + pattern[1] > end)
946 return 0; /* cannot match */
947
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000948 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000949
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000950 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
951 if (count < 0)
952 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000953
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000954 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000955
956 /* when we arrive here, count contains the number of
957 matches, and ptr points to the tail of the target
958 string. check if the rest of the pattern matches,
959 and backtrack if not. */
960
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000961 if (count < (int) pattern[1])
962 return 0;
963
964 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
965 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000966 state->ptr = ptr;
967 return 1;
968
969 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
970 /* tail starts with a literal. skip positions where
971 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000972 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000973 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000974 while (count >= (int) pattern[1] &&
975 (ptr >= end || *ptr != chr)) {
976 ptr--;
977 count--;
978 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000979 if (count < (int) pattern[1])
980 break;
981 state->ptr = ptr;
982 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000983 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000984 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000985 ptr--;
986 count--;
987 }
988
989 } else {
990 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000991 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000992 while (count >= (int) pattern[1]) {
993 state->ptr = ptr;
994 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000995 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000996 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000997 ptr--;
998 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000999 if (state->lastmark > lastmark) {
1000 memset(
1001 state->mark + lastmark + 1, 0,
1002 (state->lastmark - lastmark) * sizeof(void*)
1003 );
1004 state->lastmark = lastmark;
1005 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001006 }
1007 }
1008 return 0;
1009
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001010 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001011 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001012 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001013 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001014 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001015 pattern[1], pattern[2]));
1016
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001017 rep.count = -1;
1018 rep.pattern = pattern;
1019
1020 /* install new repeat context */
1021 rep.prev = state->repeat;
1022 state->repeat = &rep;
1023
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001024 state->ptr = ptr;
1025 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001026
1027 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001028
1029 return i;
1030
1031 case SRE_OP_MAX_UNTIL:
1032 /* maximizing repeat */
1033 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1034
1035 /* FIXME: we probably need to deal with zero-width
1036 matches in here... */
1037
1038 rp = state->repeat;
1039 if (!rp)
1040 return SRE_ERROR_STATE;
1041
1042 state->ptr = ptr;
1043
1044 count = rp->count + 1;
1045
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001046 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001047
1048 if (count < rp->pattern[1]) {
1049 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001050 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001051 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001052 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001053 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001054 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001055 rp->count = count - 1;
1056 state->ptr = ptr;
1057 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001058 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001059
1060 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001061 /* we may have enough matches, but if we can
1062 match another item, do so */
1063 rp->count = count;
1064 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001065 i = mark_save(state, 0, lastmark);
1066 if (i < 0)
1067 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001068 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001069 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001070 if (i)
1071 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001072 i = mark_restore(state, 0, lastmark);
Fredrik Lundh397a6542001-10-18 19:30:16 +00001073 state->lastmark = lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001074 if (i < 0)
1075 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001076 rp->count = count - 1;
1077 state->ptr = ptr;
1078 }
1079
1080 /* cannot match more repeated items here. make sure the
1081 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001082 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001083 i = SRE_MATCH(state, pattern, level + 1);
1084 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001085 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001086 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001087 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001088 return 0;
1089
1090 case SRE_OP_MIN_UNTIL:
1091 /* minimizing repeat */
1092 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1093
1094 rp = state->repeat;
1095 if (!rp)
1096 return SRE_ERROR_STATE;
1097
1098 count = rp->count + 1;
1099
Fredrik Lundh770617b2001-01-14 15:06:11 +00001100 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1101 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001102
1103 state->ptr = ptr;
1104
1105 if (count < rp->pattern[1]) {
1106 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001107 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001108 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001109 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001110 if (i)
1111 return i;
1112 rp->count = count-1;
1113 state->ptr = ptr;
1114 return 0;
1115 }
1116
1117 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001118 state->repeat = rp->prev;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +00001119 /* FIXME: the following fix doesn't always work (#133283) */
Fredrik Lundhdf781e62001-07-02 19:54:28 +00001120 if (rp->pattern[2] == 65535) {
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001121 /* unbounded repeat */
1122 for (;;) {
1123 i = SRE_MATCH(state, pattern, level + 1);
1124 if (i || ptr >= end)
1125 break;
1126 state->ptr = ++ptr;
1127 }
1128 } else
1129 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001130 if (i) {
1131 /* free(rp); */
1132 return i;
1133 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001134
Fredrik Lundh770617b2001-01-14 15:06:11 +00001135 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001136 state->repeat = rp;
1137
1138 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1139 return 0;
1140
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001141 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001142 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001143 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001144 if (i)
1145 return i;
1146 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001147 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001148 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001149
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001150 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001151 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001152 return SRE_ERROR_ILLEGAL;
1153 }
1154 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001155
Sjoerd Mullender89dfe9e2001-08-30 14:37:07 +00001156 /* can't end up here */
Fredrik Lundh21009b92001-09-18 18:47:09 +00001157 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
Guido van Rossumb700df92000-03-31 14:59:30 +00001158}
1159
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001160LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001161SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1162{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001163 SRE_CHAR* ptr = state->start;
1164 SRE_CHAR* end = state->end;
1165 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001166 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001167 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001168 SRE_CODE* prefix = NULL;
1169 SRE_CODE* charset = NULL;
1170 SRE_CODE* overlap = NULL;
1171 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001172
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001173 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001174 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001175 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001176
1177 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001178
1179 if (pattern[3] > 0) {
1180 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001181 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001182 end -= pattern[3]-1;
1183 if (end <= ptr)
1184 end = ptr+1;
1185 }
1186
Fredrik Lundh3562f112000-07-02 12:00:07 +00001187 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001188 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001189 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001190 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001191 prefix_skip = pattern[6];
1192 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001193 overlap = prefix + prefix_len - 1;
1194 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001195 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001196 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001197 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001198
1199 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001200 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001201
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001202 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1203 TRACE(("charset = %p\n", charset));
1204
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001205#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001206 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001207 /* pattern starts with a known prefix. use the overlap
1208 table to skip forward as fast as we possibly can */
1209 int i = 0;
1210 end = state->end;
1211 while (ptr < end) {
1212 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001213 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001214 if (!i)
1215 break;
1216 else
1217 i = overlap[i];
1218 } else {
1219 if (++i == prefix_len) {
1220 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001221 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1222 state->start = ptr + 1 - prefix_len;
1223 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001224 if (flags & SRE_INFO_LITERAL)
1225 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001226 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001227 if (status != 0)
1228 return status;
1229 /* close but no cigar -- try again */
1230 i = overlap[i];
1231 }
1232 break;
1233 }
1234
1235 }
1236 ptr++;
1237 }
1238 return 0;
1239 }
1240#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001241
Fredrik Lundh3562f112000-07-02 12:00:07 +00001242 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001243 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001244 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001245 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001246 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001247 for (;;) {
1248 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1249 ptr++;
1250 if (ptr == end)
1251 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001252 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001253 state->start = ptr;
1254 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001255 if (flags & SRE_INFO_LITERAL)
1256 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001257 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001258 if (status != 0)
1259 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001260 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001261 } else if (charset) {
1262 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001263 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001264 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001265 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001266 ptr++;
1267 if (ptr == end)
1268 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001269 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001270 state->start = ptr;
1271 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001272 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001273 if (status != 0)
1274 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001275 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001276 }
1277 } else
1278 /* general case */
1279 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001280 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001281 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001282 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001283 if (status != 0)
1284 break;
1285 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001286
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001287 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001288}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001289
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001290LOCAL(int)
1291SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1292{
1293 /* check if given string is a literal template (i.e. no escapes) */
1294 while (len-- > 0)
1295 if (*ptr++ == '\\')
1296 return 0;
1297 return 1;
1298}
Guido van Rossumb700df92000-03-31 14:59:30 +00001299
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001300#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001301
1302/* -------------------------------------------------------------------- */
1303/* factories and destructors */
1304
1305/* see sre.h for object declarations */
1306
1307staticforward PyTypeObject Pattern_Type;
1308staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001309staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001310
1311static PyObject *
1312_compile(PyObject* self_, PyObject* args)
1313{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001314 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001315
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001316 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001317 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001318
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001319 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001320 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001321 PyObject* code;
1322 int groups = 0;
1323 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001324 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001325 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1326 &PyList_Type, &code, &groups,
1327 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001328 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001329
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001330 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001331
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001332 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001333 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001334 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001335
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001336 self->codesize = n;
1337
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001338 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001339 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001340 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001341 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001342
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001343 if (PyErr_Occurred()) {
1344 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001345 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001346 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001347
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001348 Py_INCREF(pattern);
1349 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001350
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001351 self->flags = flags;
1352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001353 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001354
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001355 Py_XINCREF(groupindex);
1356 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001357
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001358 Py_XINCREF(indexgroup);
1359 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001360
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001361 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001362}
1363
1364static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001365sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001366{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001367 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001368}
1369
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001370static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001371sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001372{
1373 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001374 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001375 return NULL;
1376 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001377 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001378 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001379#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001380 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001381#else
1382 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001383#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001384 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001385}
1386
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001387LOCAL(void)
1388state_reset(SRE_STATE* state)
1389{
1390 int i;
1391
1392 state->lastmark = 0;
1393
1394 /* FIXME: dynamic! */
1395 for (i = 0; i < SRE_MARK_SIZE; i++)
1396 state->mark[i] = NULL;
1397
1398 state->lastindex = -1;
1399
1400 state->repeat = NULL;
1401
1402 mark_fini(state);
1403}
1404
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001405static void*
1406getstring(PyObject* string, int* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001407{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001408 /* given a python object, return a data pointer, a length (in
1409 characters), and a character size. return NULL if the object
1410 is not a string (or not compatible) */
1411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001412 PyBufferProcs *buffer;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001413 int size, bytes, charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001414 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001415
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001416#if defined(HAVE_UNICODE)
1417 if (PyUnicode_Check(string)) {
1418 /* unicode strings doesn't always support the buffer interface */
1419 ptr = (void*) PyUnicode_AS_DATA(string);
1420 bytes = PyUnicode_GET_DATA_SIZE(string);
1421 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001422 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001423
1424 } else {
1425#endif
1426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001427 /* get pointer to string buffer */
1428 buffer = string->ob_type->tp_as_buffer;
1429 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1430 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001431 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001432 return NULL;
1433 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001434
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001435 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001436 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1437 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001438 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1439 return NULL;
1440 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001441
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001442 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001443#if PY_VERSION_HEX >= 0x01060000
1444 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001445#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001446 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001447#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001448
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001449 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001450 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001451#if defined(HAVE_UNICODE)
1452 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001453 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001454#endif
1455 else {
1456 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1457 return NULL;
1458 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001459
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001460#if defined(HAVE_UNICODE)
1461 }
1462#endif
1463
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001464 *p_length = size;
1465 *p_charsize = charsize;
1466
1467 return ptr;
1468}
1469
1470LOCAL(PyObject*)
1471state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1472 int start, int end)
1473{
1474 /* prepare state object */
1475
1476 int length;
1477 int charsize;
1478 void* ptr;
1479
1480 memset(state, 0, sizeof(SRE_STATE));
1481
1482 state->lastindex = -1;
1483
1484 ptr = getstring(string, &length, &charsize);
1485 if (!ptr)
1486 return NULL;
1487
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001488 /* adjust boundaries */
1489 if (start < 0)
1490 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001491 else if (start > length)
1492 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001493
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001494 if (end < 0)
1495 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001496 else if (end > length)
1497 end = length;
1498
1499 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001500
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001501 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001502
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001503 state->start = (void*) ((char*) ptr + start * state->charsize);
1504 state->end = (void*) ((char*) ptr + end * state->charsize);
1505
1506 Py_INCREF(string);
1507 state->string = string;
1508 state->pos = start;
1509 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001510
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001511 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001512 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001513 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001514#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001515 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001516#else
1517 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001518#endif
1519 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001520 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001521
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001522 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001523}
1524
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001525LOCAL(void)
1526state_fini(SRE_STATE* state)
1527{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001528 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001529 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001530}
1531
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001532/* calculate offset from start of string */
1533#define STATE_OFFSET(state, member)\
1534 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1535
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001536LOCAL(PyObject*)
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001537state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001538{
Fredrik Lundh58100642000-08-09 09:14:35 +00001539 int i, j;
1540
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001541 index = (index - 1) * 2;
1542
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001543 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001544 if (empty)
1545 /* want empty string */
1546 i = j = 0;
1547 else {
1548 Py_INCREF(Py_None);
1549 return Py_None;
1550 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001551 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001552 i = STATE_OFFSET(state, state->mark[index]);
1553 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001554 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001555
Fredrik Lundh58100642000-08-09 09:14:35 +00001556 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001557}
1558
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001559static void
1560pattern_error(int status)
1561{
1562 switch (status) {
1563 case SRE_ERROR_RECURSION_LIMIT:
1564 PyErr_SetString(
1565 PyExc_RuntimeError,
1566 "maximum recursion limit exceeded"
1567 );
1568 break;
1569 case SRE_ERROR_MEMORY:
1570 PyErr_NoMemory();
1571 break;
1572 default:
1573 /* other error codes indicate compiler/engine bugs */
1574 PyErr_SetString(
1575 PyExc_RuntimeError,
1576 "internal error in regular expression engine"
1577 );
1578 }
1579}
1580
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001581static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001582pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001583{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001584 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001585
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001586 MatchObject* match;
1587 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001588 char* base;
1589 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001590
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001591 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001592
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001593 /* create match object (with room for extra group marks) */
1594 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001595 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001596 if (!match)
1597 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001598
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001599 Py_INCREF(pattern);
1600 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001602 Py_INCREF(state->string);
1603 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001605 match->regs = NULL;
1606 match->groups = pattern->groups+1;
1607
1608 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001609
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001610 base = (char*) state->beginning;
1611 n = state->charsize;
1612
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001613 match->mark[0] = ((char*) state->start - base) / n;
1614 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001615
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001616 for (i = j = 0; i < pattern->groups; i++, j+=2)
1617 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1618 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1619 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1620 } else
1621 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1622
1623 match->pos = state->pos;
1624 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001625
Fredrik Lundh6f013982000-07-03 18:44:21 +00001626 match->lastindex = state->lastindex;
1627
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001628 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001629
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001630 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001631
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001632 /* no match */
1633 Py_INCREF(Py_None);
1634 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001635
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001636 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001637
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001638 /* internal error */
1639 pattern_error(status);
1640 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001641}
1642
1643static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001644pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001645{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001646 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001648 ScannerObject* self;
1649
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001650 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001651 int start = 0;
1652 int end = INT_MAX;
1653 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1654 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001655
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001656 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001657 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001658 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001659 return NULL;
1660
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001661 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001662 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001663 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001664 return NULL;
1665 }
1666
1667 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001668 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001669
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001670 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001671}
1672
Guido van Rossumb700df92000-03-31 14:59:30 +00001673static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001674pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001675{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001676 Py_XDECREF(self->pattern);
1677 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001678 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001679 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001680}
1681
1682static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001683pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001684{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001685 SRE_STATE state;
1686 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001687
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001688 PyObject* string;
1689 int start = 0;
1690 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001691 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1692 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1693 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001694 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001695
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001696 string = state_init(&state, self, string, start, end);
1697 if (!string)
1698 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001699
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001700 state.ptr = state.start;
1701
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001702 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1703
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001704 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001705 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001706 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001707#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001708 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001709#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001710 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001711
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001712 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1713
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001714 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001715
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001716 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001717}
1718
1719static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001720pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001721{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001722 SRE_STATE state;
1723 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001724
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001725 PyObject* string;
1726 int start = 0;
1727 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001728 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1729 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1730 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001731 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001732
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001733 string = state_init(&state, self, string, start, end);
1734 if (!string)
1735 return NULL;
1736
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001737 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1738
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001739 if (state.charsize == 1) {
1740 status = sre_search(&state, PatternObject_GetCode(self));
1741 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001742#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001743 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001744#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001745 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001746
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001747 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001749 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001750
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001751 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001752}
1753
1754static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001755call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001756{
1757 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001758 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001759 PyObject* func;
1760 PyObject* result;
1761
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001762 if (!args)
1763 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001764 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001765 if (!name)
1766 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001767 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001768 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001769 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001770 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001771 func = PyObject_GetAttrString(mod, function);
1772 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001773 if (!func)
1774 return NULL;
1775 result = PyObject_CallObject(func, args);
1776 Py_DECREF(func);
1777 Py_DECREF(args);
1778 return result;
1779}
1780
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001781#ifdef USE_BUILTIN_COPY
1782static int
1783deepcopy(PyObject** object, PyObject* memo)
1784{
1785 PyObject* copy;
1786
1787 copy = call(
1788 "copy", "deepcopy",
1789 Py_BuildValue("OO", *object, memo)
1790 );
1791 if (!copy)
1792 return 0;
1793
1794 Py_DECREF(*object);
1795 *object = copy;
1796
1797 return 1; /* success */
1798}
1799#endif
1800
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001801static PyObject*
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001802join(PyObject* list, PyObject* pattern)
1803{
1804 /* join list elements */
1805
1806 PyObject* joiner;
1807#if PY_VERSION_HEX >= 0x01060000
1808 PyObject* function;
1809 PyObject* args;
1810#endif
1811 PyObject* result;
1812
1813 switch (PyList_GET_SIZE(list)) {
1814 case 0:
1815 Py_DECREF(list);
1816 return PyString_FromString("");
1817 case 1:
1818 result = PyList_GET_ITEM(list, 0);
1819 Py_INCREF(result);
1820 Py_DECREF(list);
1821 return result;
1822 }
1823
1824 /* two or more elements: slice out a suitable separator from the
1825 first member, and use that to join the entire list */
1826
1827 joiner = PySequence_GetSlice(pattern, 0, 0);
1828 if (!joiner)
1829 return NULL;
1830
1831#if PY_VERSION_HEX >= 0x01060000
1832 function = PyObject_GetAttrString(joiner, "join");
1833 if (!function) {
1834 Py_DECREF(joiner);
1835 return NULL;
1836 }
1837 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001838 if (!args) {
1839 Py_DECREF(function);
1840 Py_DECREF(joiner);
1841 return NULL;
1842 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001843 PyTuple_SET_ITEM(args, 0, list);
1844 result = PyObject_CallObject(function, args);
1845 Py_DECREF(args); /* also removes list */
1846 Py_DECREF(function);
1847#else
1848 result = call(
1849 "string", "join",
1850 Py_BuildValue("OO", list, joiner)
1851 );
1852#endif
1853 Py_DECREF(joiner);
1854
1855 return result;
1856}
1857
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001858static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001859pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001860{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001861 SRE_STATE state;
1862 PyObject* list;
1863 int status;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001864 int i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00001865
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001866 PyObject* string;
1867 int start = 0;
1868 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001869 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1870 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1871 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001873
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001874 string = state_init(&state, self, string, start, end);
1875 if (!string)
1876 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001877
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001878 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001879 if (!list) {
1880 state_fini(&state);
1881 return NULL;
1882 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001883
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001884 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001885
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001886 PyObject* item;
1887
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001888 state_reset(&state);
1889
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001890 state.ptr = state.start;
1891
1892 if (state.charsize == 1) {
1893 status = sre_search(&state, PatternObject_GetCode(self));
1894 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001895#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001896 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001897#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001898 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001899
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001900 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001901 if (status == 0)
1902 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001903 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001904 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001905 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001906
1907 /* don't bother to build a match object */
1908 switch (self->groups) {
1909 case 0:
1910 b = STATE_OFFSET(&state, state.start);
1911 e = STATE_OFFSET(&state, state.ptr);
1912 item = PySequence_GetSlice(string, b, e);
1913 if (!item)
1914 goto error;
1915 break;
1916 case 1:
1917 item = state_getslice(&state, 1, string, 1);
1918 if (!item)
1919 goto error;
1920 break;
1921 default:
1922 item = PyTuple_New(self->groups);
1923 if (!item)
1924 goto error;
1925 for (i = 0; i < self->groups; i++) {
1926 PyObject* o = state_getslice(&state, i+1, string, 1);
1927 if (!o) {
1928 Py_DECREF(item);
1929 goto error;
1930 }
1931 PyTuple_SET_ITEM(item, i, o);
1932 }
1933 break;
1934 }
1935
1936 status = PyList_Append(list, item);
1937 Py_DECREF(item);
1938 if (status < 0)
1939 goto error;
1940
1941 if (state.ptr == state.start)
1942 state.start = (void*) ((char*) state.ptr + state.charsize);
1943 else
1944 state.start = state.ptr;
1945
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001946 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001947
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001948 state_fini(&state);
1949 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001950
1951error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001952 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001953 state_fini(&state);
1954 return NULL;
1955
Guido van Rossumb700df92000-03-31 14:59:30 +00001956}
1957
Fredrik Lundh703ce812001-10-24 22:16:30 +00001958#if PY_VERSION_HEX >= 0x02020000
1959static PyObject*
1960pattern_finditer(PatternObject* pattern, PyObject* args)
1961{
1962 PyObject* scanner;
1963 PyObject* search;
1964 PyObject* iterator;
1965
1966 scanner = pattern_scanner(pattern, args);
1967 if (!scanner)
1968 return NULL;
1969
1970 search = PyObject_GetAttrString(scanner, "search");
1971 Py_DECREF(scanner);
1972 if (!search)
1973 return NULL;
1974
1975 iterator = PyCallIter_New(search, Py_None);
1976 Py_DECREF(search);
1977
1978 return iterator;
1979}
1980#endif
1981
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001982static PyObject*
1983pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
1984{
1985 SRE_STATE state;
1986 PyObject* list;
1987 PyObject* item;
1988 int status;
1989 int n;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001990 int i;
1991 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001992
1993 PyObject* string;
1994 int maxsplit = 0;
1995 static char* kwlist[] = { "source", "maxsplit", NULL };
1996 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
1997 &string, &maxsplit))
1998 return NULL;
1999
2000 string = state_init(&state, self, string, 0, INT_MAX);
2001 if (!string)
2002 return NULL;
2003
2004 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002005 if (!list) {
2006 state_fini(&state);
2007 return NULL;
2008 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002009
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002010 n = 0;
2011 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002012
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002013 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002014
2015 state_reset(&state);
2016
2017 state.ptr = state.start;
2018
2019 if (state.charsize == 1) {
2020 status = sre_search(&state, PatternObject_GetCode(self));
2021 } else {
2022#if defined(HAVE_UNICODE)
2023 status = sre_usearch(&state, PatternObject_GetCode(self));
2024#endif
2025 }
2026
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002027 if (status <= 0) {
2028 if (status == 0)
2029 break;
2030 pattern_error(status);
2031 goto error;
2032 }
2033
2034 if (state.start == state.ptr) {
2035 if (last == state.end)
2036 break;
2037 /* skip one character */
2038 state.start = (void*) ((char*) state.ptr + state.charsize);
2039 continue;
2040 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002041
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002042 /* get segment before this match */
2043 item = PySequence_GetSlice(
2044 string, STATE_OFFSET(&state, last),
2045 STATE_OFFSET(&state, state.start)
2046 );
2047 if (!item)
2048 goto error;
2049 status = PyList_Append(list, item);
2050 Py_DECREF(item);
2051 if (status < 0)
2052 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002053
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002054 /* add groups (if any) */
2055 for (i = 0; i < self->groups; i++) {
2056 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002057 if (!item)
2058 goto error;
2059 status = PyList_Append(list, item);
2060 Py_DECREF(item);
2061 if (status < 0)
2062 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002063 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002064
2065 n = n + 1;
2066
2067 last = state.start = state.ptr;
2068
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002069 }
2070
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002071 /* get segment following last match (even if empty) */
2072 item = PySequence_GetSlice(
2073 string, STATE_OFFSET(&state, last), state.endpos
2074 );
2075 if (!item)
2076 goto error;
2077 status = PyList_Append(list, item);
2078 Py_DECREF(item);
2079 if (status < 0)
2080 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002081
2082 state_fini(&state);
2083 return list;
2084
2085error:
2086 Py_DECREF(list);
2087 state_fini(&state);
2088 return NULL;
2089
2090}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002091
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002092static PyObject*
2093pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2094 int count, int subn)
2095{
2096 SRE_STATE state;
2097 PyObject* list;
2098 PyObject* item;
2099 PyObject* filter;
2100 PyObject* args;
2101 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002102 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002103 int status;
2104 int n;
2105 int i, b, e;
2106 int filter_is_callable;
2107
Fredrik Lundhdac58492001-10-21 21:48:30 +00002108 if (PyCallable_Check(template)) {
2109 /* sub/subn takes either a function or a template */
2110 filter = template;
2111 Py_INCREF(filter);
2112 filter_is_callable = 1;
2113 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002114 /* if not callable, check if it's a literal string */
2115 int literal;
2116 ptr = getstring(template, &n, &b);
2117 if (ptr) {
2118 if (b == 1) {
2119 literal = sre_literal_template(ptr, n);
2120 } else {
2121#if defined(HAVE_UNICODE)
2122 literal = sre_uliteral_template(ptr, n);
2123#endif
2124 }
2125 } else {
2126 PyErr_Clear();
2127 literal = 0;
2128 }
2129 if (literal) {
2130 filter = template;
2131 Py_INCREF(filter);
2132 filter_is_callable = 0;
2133 } else {
2134 /* not a literal; hand it over to the template compiler */
2135 filter = call(
2136 SRE_MODULE, "_subx",
2137 Py_BuildValue("OO", self, template)
2138 );
2139 if (!filter)
2140 return NULL;
2141 filter_is_callable = PyCallable_Check(filter);
2142 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002143 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002144
2145 string = state_init(&state, self, string, 0, INT_MAX);
2146 if (!string)
2147 return NULL;
2148
2149 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002150 if (!list) {
2151 state_fini(&state);
2152 return NULL;
2153 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002154
2155 n = i = 0;
2156
2157 while (!count || n < count) {
2158
2159 state_reset(&state);
2160
2161 state.ptr = state.start;
2162
2163 if (state.charsize == 1) {
2164 status = sre_search(&state, PatternObject_GetCode(self));
2165 } else {
2166#if defined(HAVE_UNICODE)
2167 status = sre_usearch(&state, PatternObject_GetCode(self));
2168#endif
2169 }
2170
2171 if (status <= 0) {
2172 if (status == 0)
2173 break;
2174 pattern_error(status);
2175 goto error;
2176 }
2177
2178 b = STATE_OFFSET(&state, state.start);
2179 e = STATE_OFFSET(&state, state.ptr);
2180
2181 if (i < b) {
2182 /* get segment before this match */
2183 item = PySequence_GetSlice(string, i, b);
2184 if (!item)
2185 goto error;
2186 status = PyList_Append(list, item);
2187 Py_DECREF(item);
2188 if (status < 0)
2189 goto error;
2190
2191 } else if (i == b && i == e && n > 0)
2192 /* ignore empty match on latest position */
2193 goto next;
2194
2195 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002196 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002197 match = pattern_new_match(self, &state, 1);
2198 if (!match)
2199 goto error;
2200 args = Py_BuildValue("(O)", match);
2201 if (!args) {
2202 Py_DECREF(args);
2203 goto error;
2204 }
2205 item = PyObject_CallObject(filter, args);
2206 Py_DECREF(args);
2207 Py_DECREF(match);
2208 if (!item)
2209 goto error;
2210 } else {
2211 /* filter is literal string */
2212 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002213 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002214 }
2215
2216 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002217 if (item != Py_None) {
2218 status = PyList_Append(list, item);
2219 Py_DECREF(item);
2220 if (status < 0)
2221 goto error;
2222 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002223
2224 i = e;
2225 n = n + 1;
2226
2227next:
2228 /* move on */
2229 if (state.ptr == state.start)
2230 state.start = (void*) ((char*) state.ptr + state.charsize);
2231 else
2232 state.start = state.ptr;
2233
2234 }
2235
2236 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002237 if (i < state.endpos) {
2238 item = PySequence_GetSlice(string, i, state.endpos);
2239 if (!item)
2240 goto error;
2241 status = PyList_Append(list, item);
2242 Py_DECREF(item);
2243 if (status < 0)
2244 goto error;
2245 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002246
2247 state_fini(&state);
2248
Fredrik Lundhdac58492001-10-21 21:48:30 +00002249 /* convert list to single string (also removes list) */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002250 item = join(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002251
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002252 if (!item)
2253 return NULL;
2254
2255 if (subn)
2256 return Py_BuildValue("Ni", item, n);
2257
2258 return item;
2259
2260error:
2261 Py_DECREF(list);
2262 state_fini(&state);
2263 return NULL;
2264
2265}
2266
2267static PyObject*
2268pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2269{
2270 PyObject* template;
2271 PyObject* string;
2272 int count = 0;
2273 static char* kwlist[] = { "repl", "string", "count", NULL };
2274 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2275 &template, &string, &count))
2276 return NULL;
2277
2278 return pattern_subx(self, template, string, count, 0);
2279}
2280
2281static PyObject*
2282pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2283{
2284 PyObject* template;
2285 PyObject* string;
2286 int count = 0;
2287 static char* kwlist[] = { "repl", "string", "count", NULL };
2288 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2289 &template, &string, &count))
2290 return NULL;
2291
2292 return pattern_subx(self, template, string, count, 1);
2293}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002294
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002295static PyObject*
2296pattern_copy(PatternObject* self, PyObject* args)
2297{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002298#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002299 PatternObject* copy;
2300 int offset;
2301
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002302 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2303 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002304
2305 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2306 if (!copy)
2307 return NULL;
2308
2309 offset = offsetof(PatternObject, groups);
2310
2311 Py_XINCREF(self->groupindex);
2312 Py_XINCREF(self->indexgroup);
2313 Py_XINCREF(self->pattern);
2314
2315 memcpy((char*) copy + offset, (char*) self + offset,
2316 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2317
2318 return (PyObject*) copy;
2319#else
2320 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2321 return NULL;
2322#endif
2323}
2324
2325static PyObject*
2326pattern_deepcopy(PatternObject* self, PyObject* args)
2327{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002328#ifdef USE_BUILTIN_COPY
2329 PatternObject* copy;
2330
2331 PyObject* memo;
2332 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2333 return NULL;
2334
2335 copy = (PatternObject*) pattern_copy(self, Py_None);
2336 if (!copy)
2337 return NULL;
2338
2339 if (!deepcopy(&copy->groupindex, memo) ||
2340 !deepcopy(&copy->indexgroup, memo) ||
2341 !deepcopy(&copy->pattern, memo)) {
2342 Py_DECREF(copy);
2343 return NULL;
2344 }
2345
2346#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002347 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2348 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002349#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002350}
2351
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002352static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002353 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2354 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2355 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2356 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2357 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2358 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002359#if PY_VERSION_HEX >= 0x02020000
2360 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS},
2361#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002362 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002363 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2364 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002365 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002366};
2367
2368static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002369pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002370{
2371 PyObject* res;
2372
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002373 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002374
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002375 if (res)
2376 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002377
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002378 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002379
2380 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002381 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002382 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002383 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002384 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002385
2386 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002387 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002388
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002389 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002390 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002391
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002392 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002393 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002394 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002395 }
2396
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002397 PyErr_SetString(PyExc_AttributeError, name);
2398 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002399}
2400
2401statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002402 PyObject_HEAD_INIT(NULL)
2403 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002404 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002405 (destructor)pattern_dealloc, /*tp_dealloc*/
2406 0, /*tp_print*/
2407 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002408};
2409
2410/* -------------------------------------------------------------------- */
2411/* match methods */
2412
2413static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002414match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002415{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002416 Py_XDECREF(self->regs);
2417 Py_XDECREF(self->string);
2418 Py_DECREF(self->pattern);
2419 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002420}
2421
2422static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002423match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002424{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002425 if (index < 0 || index >= self->groups) {
2426 /* raise IndexError if we were given a bad group number */
2427 PyErr_SetString(
2428 PyExc_IndexError,
2429 "no such group"
2430 );
2431 return NULL;
2432 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002433
Fredrik Lundh6f013982000-07-03 18:44:21 +00002434 index *= 2;
2435
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002436 if (self->string == Py_None || self->mark[index] < 0) {
2437 /* return default value if the string or group is undefined */
2438 Py_INCREF(def);
2439 return def;
2440 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002441
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002442 return PySequence_GetSlice(
2443 self->string, self->mark[index], self->mark[index+1]
2444 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002445}
2446
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002447static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002448match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002449{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002450 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002451
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002452 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002453 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002454
Fredrik Lundh6f013982000-07-03 18:44:21 +00002455 i = -1;
2456
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002457 if (self->pattern->groupindex) {
2458 index = PyObject_GetItem(self->pattern->groupindex, index);
2459 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002460 if (PyInt_Check(index))
2461 i = (int) PyInt_AS_LONG(index);
2462 Py_DECREF(index);
2463 } else
2464 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002465 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002466
2467 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002468}
2469
2470static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002471match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002472{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002473 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002474}
2475
2476static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002477match_expand(MatchObject* self, PyObject* args)
2478{
2479 PyObject* template;
2480 if (!PyArg_ParseTuple(args, "O:expand", &template))
2481 return NULL;
2482
2483 /* delegate to Python code */
2484 return call(
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002485 SRE_MODULE, "_expand",
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002486 Py_BuildValue("OOO", self->pattern, self, template)
2487 );
2488}
2489
2490static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002491match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002492{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002493 PyObject* result;
2494 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002495
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002496 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002497
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002498 switch (size) {
2499 case 0:
2500 result = match_getslice(self, Py_False, Py_None);
2501 break;
2502 case 1:
2503 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2504 break;
2505 default:
2506 /* fetch multiple items */
2507 result = PyTuple_New(size);
2508 if (!result)
2509 return NULL;
2510 for (i = 0; i < size; i++) {
2511 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002512 self, PyTuple_GET_ITEM(args, i), Py_None
2513 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002514 if (!item) {
2515 Py_DECREF(result);
2516 return NULL;
2517 }
2518 PyTuple_SET_ITEM(result, i, item);
2519 }
2520 break;
2521 }
2522 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002523}
2524
2525static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002526match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002527{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002528 PyObject* result;
2529 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002530
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002531 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002532 static char* kwlist[] = { "default", NULL };
2533 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002534 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002535
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002536 result = PyTuple_New(self->groups-1);
2537 if (!result)
2538 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002539
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002540 for (index = 1; index < self->groups; index++) {
2541 PyObject* item;
2542 item = match_getslice_by_index(self, index, def);
2543 if (!item) {
2544 Py_DECREF(result);
2545 return NULL;
2546 }
2547 PyTuple_SET_ITEM(result, index-1, item);
2548 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002549
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002550 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002551}
2552
2553static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002554match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002555{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002556 PyObject* result;
2557 PyObject* keys;
2558 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002559
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002560 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002561 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002562 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002563 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002564
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002565 result = PyDict_New();
2566 if (!result || !self->pattern->groupindex)
2567 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002568
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002569 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002570 if (!keys)
2571 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002572
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002573 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002574 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002575 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002576 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002577 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002578 if (!key)
2579 goto failed;
2580 value = match_getslice(self, key, def);
2581 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002582 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002583 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002584 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002585 status = PyDict_SetItem(result, key, value);
2586 Py_DECREF(value);
2587 if (status < 0)
2588 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002589 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002590
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002591 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002592
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002593 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002594
2595failed:
2596 Py_DECREF(keys);
2597 Py_DECREF(result);
2598 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002599}
2600
2601static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002602match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002603{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002604 int index;
2605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002606 PyObject* index_ = Py_False; /* zero */
2607 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2608 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002609
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002610 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002611
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002612 if (index < 0 || index >= self->groups) {
2613 PyErr_SetString(
2614 PyExc_IndexError,
2615 "no such group"
2616 );
2617 return NULL;
2618 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002619
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002620 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002621 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002622}
2623
2624static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002625match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002626{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002627 int index;
2628
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002629 PyObject* index_ = Py_False; /* zero */
2630 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2631 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002632
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002633 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002634
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002635 if (index < 0 || index >= self->groups) {
2636 PyErr_SetString(
2637 PyExc_IndexError,
2638 "no such group"
2639 );
2640 return NULL;
2641 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002642
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002643 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002644 return Py_BuildValue("i", self->mark[index*2+1]);
2645}
2646
2647LOCAL(PyObject*)
2648_pair(int i1, int i2)
2649{
2650 PyObject* pair;
2651 PyObject* item;
2652
2653 pair = PyTuple_New(2);
2654 if (!pair)
2655 return NULL;
2656
2657 item = PyInt_FromLong(i1);
2658 if (!item)
2659 goto error;
2660 PyTuple_SET_ITEM(pair, 0, item);
2661
2662 item = PyInt_FromLong(i2);
2663 if (!item)
2664 goto error;
2665 PyTuple_SET_ITEM(pair, 1, item);
2666
2667 return pair;
2668
2669 error:
2670 Py_DECREF(pair);
2671 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002672}
2673
2674static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002675match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002676{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002677 int index;
2678
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002679 PyObject* index_ = Py_False; /* zero */
2680 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2681 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002682
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002683 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002684
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002685 if (index < 0 || index >= self->groups) {
2686 PyErr_SetString(
2687 PyExc_IndexError,
2688 "no such group"
2689 );
2690 return NULL;
2691 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002692
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002693 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002694 return _pair(self->mark[index*2], self->mark[index*2+1]);
2695}
2696
2697static PyObject*
2698match_regs(MatchObject* self)
2699{
2700 PyObject* regs;
2701 PyObject* item;
2702 int index;
2703
2704 regs = PyTuple_New(self->groups);
2705 if (!regs)
2706 return NULL;
2707
2708 for (index = 0; index < self->groups; index++) {
2709 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2710 if (!item) {
2711 Py_DECREF(regs);
2712 return NULL;
2713 }
2714 PyTuple_SET_ITEM(regs, index, item);
2715 }
2716
2717 Py_INCREF(regs);
2718 self->regs = regs;
2719
2720 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002721}
2722
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002723static PyObject*
2724match_copy(MatchObject* self, PyObject* args)
2725{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002726#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002727 MatchObject* copy;
2728 int slots, offset;
2729
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002730 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2731 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002732
2733 slots = 2 * (self->pattern->groups+1);
2734
2735 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2736 if (!copy)
2737 return NULL;
2738
2739 /* this value a constant, but any compiler should be able to
2740 figure that out all by itself */
2741 offset = offsetof(MatchObject, string);
2742
2743 Py_XINCREF(self->pattern);
2744 Py_XINCREF(self->string);
2745 Py_XINCREF(self->regs);
2746
2747 memcpy((char*) copy + offset, (char*) self + offset,
2748 sizeof(MatchObject) + slots * sizeof(int) - offset);
2749
2750 return (PyObject*) copy;
2751#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002752 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002753 return NULL;
2754#endif
2755}
2756
2757static PyObject*
2758match_deepcopy(MatchObject* self, PyObject* args)
2759{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002760#ifdef USE_BUILTIN_COPY
2761 MatchObject* copy;
2762
2763 PyObject* memo;
2764 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2765 return NULL;
2766
2767 copy = (MatchObject*) match_copy(self, Py_None);
2768 if (!copy)
2769 return NULL;
2770
2771 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2772 !deepcopy(&copy->string, memo) ||
2773 !deepcopy(&copy->regs, memo)) {
2774 Py_DECREF(copy);
2775 return NULL;
2776 }
2777
2778#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002779 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2780 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002781#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002782}
2783
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002784static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002785 {"group", (PyCFunction) match_group, METH_VARARGS},
2786 {"start", (PyCFunction) match_start, METH_VARARGS},
2787 {"end", (PyCFunction) match_end, METH_VARARGS},
2788 {"span", (PyCFunction) match_span, METH_VARARGS},
2789 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2790 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2791 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002792 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2793 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002794 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002795};
2796
2797static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002798match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002799{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002800 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002801
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002802 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2803 if (res)
2804 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002805
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002806 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002807
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002808 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002809 if (self->lastindex >= 0)
2810 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002811 Py_INCREF(Py_None);
2812 return Py_None;
2813 }
2814
2815 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002816 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002817 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002818 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002819 );
2820 if (result)
2821 return result;
2822 PyErr_Clear();
2823 }
2824 Py_INCREF(Py_None);
2825 return Py_None;
2826 }
2827
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002828 if (!strcmp(name, "string")) {
2829 if (self->string) {
2830 Py_INCREF(self->string);
2831 return self->string;
2832 } else {
2833 Py_INCREF(Py_None);
2834 return Py_None;
2835 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002836 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002837
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002838 if (!strcmp(name, "regs")) {
2839 if (self->regs) {
2840 Py_INCREF(self->regs);
2841 return self->regs;
2842 } else
2843 return match_regs(self);
2844 }
2845
2846 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002847 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002848 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002849 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002850
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002851 if (!strcmp(name, "pos"))
2852 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002853
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002854 if (!strcmp(name, "endpos"))
2855 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002857 PyErr_SetString(PyExc_AttributeError, name);
2858 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002859}
2860
2861/* FIXME: implement setattr("string", None) as a special case (to
2862 detach the associated string, if any */
2863
2864statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002865 PyObject_HEAD_INIT(NULL)
2866 0, "SRE_Match",
2867 sizeof(MatchObject), sizeof(int),
2868 (destructor)match_dealloc, /*tp_dealloc*/
2869 0, /*tp_print*/
2870 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002871};
2872
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002873/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002874/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002875
2876static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002877scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002878{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002879 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002880 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002881 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002882}
2883
2884static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002885scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002886{
2887 SRE_STATE* state = &self->state;
2888 PyObject* match;
2889 int status;
2890
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002891 state_reset(state);
2892
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002893 state->ptr = state->start;
2894
2895 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002896 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002897 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002898#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002899 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002900#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002901 }
2902
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002903 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002904 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002905
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002906 if (status == 0 || state->ptr == state->start)
2907 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002908 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002909 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002910
2911 return match;
2912}
2913
2914
2915static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002916scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002917{
2918 SRE_STATE* state = &self->state;
2919 PyObject* match;
2920 int status;
2921
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002922 state_reset(state);
2923
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002924 state->ptr = state->start;
2925
2926 if (state->charsize == 1) {
2927 status = sre_search(state, PatternObject_GetCode(self->pattern));
2928 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002929#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002930 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002931#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002932 }
2933
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002934 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002935 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002936
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002937 if (status == 0 || state->ptr == state->start)
2938 state->start = (void*) ((char*) state->ptr + state->charsize);
2939 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002940 state->start = state->ptr;
2941
2942 return match;
2943}
2944
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002945static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002946 {"match", (PyCFunction) scanner_match, 0},
2947 {"search", (PyCFunction) scanner_search, 0},
2948 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002949};
2950
2951static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002952scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002953{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002954 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002955
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002956 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2957 if (res)
2958 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002959
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002960 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002961
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002962 /* attributes */
2963 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002964 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002965 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002966 }
2967
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002968 PyErr_SetString(PyExc_AttributeError, name);
2969 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002970}
2971
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002972statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002973 PyObject_HEAD_INIT(NULL)
2974 0, "SRE_Scanner",
2975 sizeof(ScannerObject), 0,
2976 (destructor)scanner_dealloc, /*tp_dealloc*/
2977 0, /*tp_print*/
2978 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002979};
2980
Guido van Rossumb700df92000-03-31 14:59:30 +00002981static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002982 {"compile", _compile, 1},
2983 {"getcodesize", sre_codesize, 1},
2984 {"getlower", sre_getlower, 1},
2985 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002986};
2987
Tim Peters5687ffe2001-02-28 16:44:18 +00002988DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002989init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002990{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002991 PyObject* m;
2992 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002993 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002994
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002995 /* Patch object types */
2996 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002997 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002998
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002999 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003000 d = PyModule_GetDict(m);
3001
Fredrik Lundh21009b92001-09-18 18:47:09 +00003002 x = PyInt_FromLong(SRE_MAGIC);
3003 if (x) {
3004 PyDict_SetItemString(d, "MAGIC", x);
3005 Py_DECREF(x);
3006 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003007
Fredrik Lundh21009b92001-09-18 18:47:09 +00003008 x = PyString_FromString(copyright);
3009 if (x) {
3010 PyDict_SetItemString(d, "copyright", x);
3011 Py_DECREF(x);
3012 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003013}
3014
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003015#endif /* !defined(SRE_RECURSIVE) */