blob: 640f388bdc98a8a06acd2c5393686d84a98db98c [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000027 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000029 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000030 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000031 * 2001-04-28 fl added __copy__ methods (work in progress)
32 * 2001-05-14 fl fixes for 1.5.2
Fredrik Lundhf71ae462001-07-02 17:04:48 +000033 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000034 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000035 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000036 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh6de22ef2001-10-22 21:18:08 +000037 * 2001-10-22 fl check for literal sub/subn templates
Fredrik Lundh703ce812001-10-24 22:16:30 +000038 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000039 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Guido van Rossumb700df92000-03-31 14:59:30 +000040 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000041 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000042 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000043 * This version of the SRE library can be redistributed under CNRI's
44 * Python 1.6 license. For any other use, please contact Secret Labs
45 * AB (info@pythonware.com).
46 *
Guido van Rossumb700df92000-03-31 14:59:30 +000047 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000048 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000049 * other compatibility work.
50 */
51
52#ifndef SRE_RECURSIVE
53
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000054static char copyright[] =
Fredrik Lundhbec95b92001-10-21 16:47:57 +000055 " SRE 2.2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000056
57#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000058#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000059
60#include "sre.h"
61
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000062#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000063
Fredrik Lundh436c3d582000-06-29 08:58:44 +000064/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000065#if !defined(SRE_MODULE)
66#define SRE_MODULE "sre"
67#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000068
Guido van Rossumb700df92000-03-31 14:59:30 +000069/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000071
Fredrik Lundh971e78b2001-10-20 17:48:46 +000072#if PY_VERSION_HEX >= 0x01060000
73#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000074/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000075#define HAVE_UNICODE
76#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000077#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000078
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000080/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081
Fredrik Lundh33accc12000-08-27 20:59:47 +000082/* prevent run-away recursion (bad patterns on long strings) */
83
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000084#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000085#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
86/* require smaller recursion limit for a number of 64-bit platforms:
87 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
88/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
89#define USE_RECURSION_LIMIT 7500
90#else
91#define USE_RECURSION_LIMIT 10000
92#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000093#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000095/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000096#define USE_FAST_SEARCH
97
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000098/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000099#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000101/* enables copy/deepcopy handling (work in progress) */
102#undef USE_BUILTIN_COPY
103
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000104#if PY_VERSION_HEX < 0x01060000
105#define PyObject_DEL(op) PyMem_DEL((op))
106#endif
107
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000108/* -------------------------------------------------------------------- */
109
Fredrik Lundh80946112000-06-29 18:03:25 +0000110#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +0000111#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +0000112#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +0000113/* fastest possible local call under MSVC */
114#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000115#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000116#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000117#else
118#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000119#endif
120
121/* error codes */
122#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000123#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000124#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000125#define SRE_ERROR_MEMORY -9 /* out of memory */
126
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000127#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000128#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000129#else
130#define TRACE(v)
131#endif
132
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000133/* -------------------------------------------------------------------- */
134/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000135
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000136/* default character predicates (run sre_chars.py to regenerate tables) */
137
138#define SRE_DIGIT_MASK 1
139#define SRE_SPACE_MASK 2
140#define SRE_LINEBREAK_MASK 4
141#define SRE_ALNUM_MASK 8
142#define SRE_WORD_MASK 16
143
Fredrik Lundh21009b92001-09-18 18:47:09 +0000144/* FIXME: this assumes ASCII. create tables in init_sre() instead */
145
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000146static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1472, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1480, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
14925, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
15024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1510, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
15224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
153
Fredrik Lundhb389df32000-06-29 12:48:37 +0000154static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000015510, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
15627, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
15744, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
15861, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
159108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
160122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
161106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
162120, 121, 122, 123, 124, 125, 126, 127 };
163
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000164#define SRE_IS_DIGIT(ch)\
165 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
166#define SRE_IS_SPACE(ch)\
167 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
168#define SRE_IS_LINEBREAK(ch)\
169 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
170#define SRE_IS_ALNUM(ch)\
171 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
172#define SRE_IS_WORD(ch)\
173 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000174
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000175static unsigned int sre_lower(unsigned int ch)
176{
177 return ((ch) < 128 ? sre_char_lower[ch] : ch);
178}
179
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000180/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000182#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
183#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
184#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
185#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
186#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
187
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000188static unsigned int sre_lower_locale(unsigned int ch)
189{
190 return ((ch) < 256 ? tolower((ch)) : ch);
191}
192
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000193/* unicode-specific character predicates */
194
195#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000196
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000197#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
198#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
199#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000200#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000201#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000202
203static unsigned int sre_lower_unicode(unsigned int ch)
204{
205 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
206}
207
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000208#endif
209
Guido van Rossumb700df92000-03-31 14:59:30 +0000210LOCAL(int)
211sre_category(SRE_CODE category, unsigned int ch)
212{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000213 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000214
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000215 case SRE_CATEGORY_DIGIT:
216 return SRE_IS_DIGIT(ch);
217 case SRE_CATEGORY_NOT_DIGIT:
218 return !SRE_IS_DIGIT(ch);
219 case SRE_CATEGORY_SPACE:
220 return SRE_IS_SPACE(ch);
221 case SRE_CATEGORY_NOT_SPACE:
222 return !SRE_IS_SPACE(ch);
223 case SRE_CATEGORY_WORD:
224 return SRE_IS_WORD(ch);
225 case SRE_CATEGORY_NOT_WORD:
226 return !SRE_IS_WORD(ch);
227 case SRE_CATEGORY_LINEBREAK:
228 return SRE_IS_LINEBREAK(ch);
229 case SRE_CATEGORY_NOT_LINEBREAK:
230 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 case SRE_CATEGORY_LOC_WORD:
233 return SRE_LOC_IS_WORD(ch);
234 case SRE_CATEGORY_LOC_NOT_WORD:
235 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000236
237#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000238 case SRE_CATEGORY_UNI_DIGIT:
239 return SRE_UNI_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_NOT_DIGIT:
241 return !SRE_UNI_IS_DIGIT(ch);
242 case SRE_CATEGORY_UNI_SPACE:
243 return SRE_UNI_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_NOT_SPACE:
245 return !SRE_UNI_IS_SPACE(ch);
246 case SRE_CATEGORY_UNI_WORD:
247 return SRE_UNI_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_NOT_WORD:
249 return !SRE_UNI_IS_WORD(ch);
250 case SRE_CATEGORY_UNI_LINEBREAK:
251 return SRE_UNI_IS_LINEBREAK(ch);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
253 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000254#else
255 case SRE_CATEGORY_UNI_DIGIT:
256 return SRE_IS_DIGIT(ch);
257 case SRE_CATEGORY_UNI_NOT_DIGIT:
258 return !SRE_IS_DIGIT(ch);
259 case SRE_CATEGORY_UNI_SPACE:
260 return SRE_IS_SPACE(ch);
261 case SRE_CATEGORY_UNI_NOT_SPACE:
262 return !SRE_IS_SPACE(ch);
263 case SRE_CATEGORY_UNI_WORD:
264 return SRE_LOC_IS_WORD(ch);
265 case SRE_CATEGORY_UNI_NOT_WORD:
266 return !SRE_LOC_IS_WORD(ch);
267 case SRE_CATEGORY_UNI_LINEBREAK:
268 return SRE_IS_LINEBREAK(ch);
269 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
270 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000271#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000272 }
273 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000274}
275
276/* helpers */
277
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000279mark_fini(SRE_STATE* state)
280{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000281 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000282 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000283 state->mark_stack = NULL;
284 }
285 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000286}
287
288static int
289mark_save(SRE_STATE* state, int lo, int hi)
290{
291 void* stack;
292 int size;
293 int minsize, newsize;
294
295 if (hi <= lo)
296 return 0;
297
298 size = (hi - lo) + 1;
299
300 newsize = state->mark_stack_size;
301 minsize = state->mark_stack_base + size;
302
303 if (newsize < minsize) {
304 /* create new stack */
305 if (!newsize) {
306 newsize = 512;
307 if (newsize < minsize)
308 newsize = minsize;
309 TRACE(("allocate stack %d\n", newsize));
310 stack = malloc(sizeof(void*) * newsize);
311 } else {
312 /* grow the stack */
313 while (newsize < minsize)
314 newsize += newsize;
315 TRACE(("grow stack to %d\n", newsize));
316 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
317 }
318 if (!stack) {
319 mark_fini(state);
320 return SRE_ERROR_MEMORY;
321 }
322 state->mark_stack = stack;
323 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000324 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000325
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000326 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000327
328 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
329 size * sizeof(void*));
330
331 state->mark_stack_base += size;
332
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000333 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000334}
335
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000336static int
337mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000338{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000339 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000341 if (hi <= lo)
342 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000344 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000345
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000346 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000347
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000348 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000349
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000350 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
351 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000354}
355
Neal Norwitzaddfe0c2002-11-10 14:33:26 +0000356static void
357lastmark_restore(SRE_STATE *state, int lastmark)
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000358{
359 if (state->lastmark > lastmark) {
360 memset(
361 state->mark + lastmark + 1, 0,
362 (state->lastmark - lastmark) * sizeof(void*)
363 );
364 state->lastmark = lastmark;
365 state->lastindex = (lastmark == 0) ? -1 : (lastmark-1)/2+1;
366 }
367}
368
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000369/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000370
371#define SRE_CHAR unsigned char
372#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000373#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000374#define SRE_CHARSET sre_charset
375#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000376#define SRE_MATCH sre_match
377#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000378#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000379
380#if defined(HAVE_UNICODE)
381
Guido van Rossumb700df92000-03-31 14:59:30 +0000382#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000383#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000384#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000385
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000386#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000387#undef SRE_SEARCH
388#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000389#undef SRE_INFO
390#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000391#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000392#undef SRE_AT
393#undef SRE_CHAR
394
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000395/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000396
397#define SRE_CHAR Py_UNICODE
398#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000399#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000400#define SRE_CHARSET sre_ucharset
401#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000402#define SRE_MATCH sre_umatch
403#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000404#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000405#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
407#endif /* SRE_RECURSIVE */
408
409/* -------------------------------------------------------------------- */
410/* String matching engine */
411
412/* the following section is compiled twice, with different character
413 settings */
414
415LOCAL(int)
416SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
417{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000418 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000420 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000421
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000423
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000424 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000425 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000426 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000427
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000428 case SRE_AT_BEGINNING_LINE:
429 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000430 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000432 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000433 return (((void*) (ptr+1) == state->end &&
434 SRE_IS_LINEBREAK((int) ptr[0])) ||
435 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000436
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000437 case SRE_AT_END_LINE:
438 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000439 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000440
Fredrik Lundh770617b2001-01-14 15:06:11 +0000441 case SRE_AT_END_STRING:
442 return ((void*) ptr == state->end);
443
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 case SRE_AT_BOUNDARY:
445 if (state->beginning == state->end)
446 return 0;
447 that = ((void*) ptr > state->beginning) ?
448 SRE_IS_WORD((int) ptr[-1]) : 0;
449 this = ((void*) ptr < state->end) ?
450 SRE_IS_WORD((int) ptr[0]) : 0;
451 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000452
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000453 case SRE_AT_NON_BOUNDARY:
454 if (state->beginning == state->end)
455 return 0;
456 that = ((void*) ptr > state->beginning) ?
457 SRE_IS_WORD((int) ptr[-1]) : 0;
458 this = ((void*) ptr < state->end) ?
459 SRE_IS_WORD((int) ptr[0]) : 0;
460 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000461
462 case SRE_AT_LOC_BOUNDARY:
463 if (state->beginning == state->end)
464 return 0;
465 that = ((void*) ptr > state->beginning) ?
466 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
467 this = ((void*) ptr < state->end) ?
468 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
469 return this != that;
470
471 case SRE_AT_LOC_NON_BOUNDARY:
472 if (state->beginning == state->end)
473 return 0;
474 that = ((void*) ptr > state->beginning) ?
475 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
476 this = ((void*) ptr < state->end) ?
477 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
478 return this == that;
479
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000480#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000481 case SRE_AT_UNI_BOUNDARY:
482 if (state->beginning == state->end)
483 return 0;
484 that = ((void*) ptr > state->beginning) ?
485 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
486 this = ((void*) ptr < state->end) ?
487 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
488 return this != that;
489
490 case SRE_AT_UNI_NON_BOUNDARY:
491 if (state->beginning == state->end)
492 return 0;
493 that = ((void*) ptr > state->beginning) ?
494 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
495 this = ((void*) ptr < state->end) ?
496 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
497 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000498#endif
499
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000501
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000502 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000503}
504
505LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000506SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000507{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000508 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000509
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000510 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000511
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000512 for (;;) {
513 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000514
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000515 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000516 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000517 if (ch == set[0])
518 return ok;
519 set++;
520 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000521
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000522 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000523 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000524 if (set[0] <= ch && ch <= set[1])
525 return ok;
526 set += 2;
527 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000528
Fredrik Lundh3562f112000-07-02 12:00:07 +0000529 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000530 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000531 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
532 return ok;
533 set += 16;
534 break;
535
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000536 case SRE_OP_BIGCHARSET:
537 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
538 {
539 int count, block;
540 count = *(set++);
541 block = ((unsigned char*)set)[ch >> 8];
542 set += 128;
543 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
544 return ok;
545 set += count*16;
546 break;
547 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000548
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000549 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000550 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000551 if (sre_category(set[0], (int) ch))
552 return ok;
553 set += 1;
554 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000555
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000556 case SRE_OP_NEGATE:
557 ok = !ok;
558 break;
559
560 case SRE_OP_FAILURE:
561 return !ok;
562
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000563 default:
564 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000565 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000566 return 0;
567 }
568 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000569}
570
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000571LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
572
573LOCAL(int)
574SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
575{
576 SRE_CODE chr;
577 SRE_CHAR* ptr = state->ptr;
578 SRE_CHAR* end = state->end;
579 int i;
580
581 /* adjust end */
582 if (maxcount < end - ptr && maxcount != 65535)
583 end = ptr + maxcount;
584
585 switch (pattern[0]) {
586
587 case SRE_OP_ANY:
588 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000589 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000590 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
591 ptr++;
592 break;
593
594 case SRE_OP_ANY_ALL:
595 /* repeated dot wildcare. skip to the end of the target
596 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000597 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000598 ptr = end;
599 break;
600
601 case SRE_OP_LITERAL:
602 /* repeated literal */
603 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000604 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000605 while (ptr < end && (SRE_CODE) *ptr == chr)
606 ptr++;
607 break;
608
609 case SRE_OP_LITERAL_IGNORE:
610 /* repeated literal */
611 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000613 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
614 ptr++;
615 break;
616
617 case SRE_OP_NOT_LITERAL:
618 /* repeated non-literal */
619 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000620 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000621 while (ptr < end && (SRE_CODE) *ptr != chr)
622 ptr++;
623 break;
624
625 case SRE_OP_NOT_LITERAL_IGNORE:
626 /* repeated non-literal */
627 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000628 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000629 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
630 ptr++;
631 break;
632
633 case SRE_OP_IN:
634 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000635 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
636 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000637 ptr++;
638 break;
639
640 default:
641 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000642 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000643 while ((SRE_CHAR*) state->ptr < end) {
644 i = SRE_MATCH(state, pattern, level);
645 if (i < 0)
646 return i;
647 if (!i)
648 break;
649 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000650 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
651 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000652 return (SRE_CHAR*) state->ptr - ptr;
653 }
654
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000655 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000656 return ptr - (SRE_CHAR*) state->ptr;
657}
658
Fredrik Lundh33accc12000-08-27 20:59:47 +0000659#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000660LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000661SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
662{
663 /* check if an SRE_OP_INFO block matches at the current position.
664 returns the number of SRE_CODE objects to skip if successful, 0
665 if no match */
666
667 SRE_CHAR* end = state->end;
668 SRE_CHAR* ptr = state->ptr;
669 int i;
670
671 /* check minimal length */
672 if (pattern[3] && (end - ptr) < pattern[3])
673 return 0;
674
675 /* check known prefix */
676 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
677 /* <length> <skip> <prefix data> <overlap data> */
678 for (i = 0; i < pattern[5]; i++)
679 if ((SRE_CODE) ptr[i] != pattern[7 + i])
680 return 0;
681 return pattern[0] + 2 * pattern[6];
682 }
683 return pattern[0];
684}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000685#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000686
687LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000688SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000689{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000690 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000691 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000692
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000693 SRE_CHAR* end = state->end;
694 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000696 SRE_REPEAT* rp;
697 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000698 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000699
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000700 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000701
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000702 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000703
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000704#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000705 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000706 return SRE_ERROR_RECURSION_LIMIT;
707#endif
708
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000709#if defined(USE_RECURSION_LIMIT)
710 if (level > USE_RECURSION_LIMIT)
711 return SRE_ERROR_RECURSION_LIMIT;
712#endif
713
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000714 if (pattern[0] == SRE_OP_INFO) {
715 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000716 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000717 if (pattern[3] && (end - ptr) < pattern[3]) {
718 TRACE(("reject (got %d chars, need %d)\n",
719 (end - ptr), pattern[3]));
720 return 0;
721 }
722 pattern += pattern[1] + 1;
723 }
724
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000728
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 case SRE_OP_FAILURE:
730 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000731 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000732 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000733
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 case SRE_OP_SUCCESS:
735 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000736 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000737 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000738 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000739
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 case SRE_OP_AT:
741 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000742 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000743 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000744 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000745 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 pattern++;
747 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000749 case SRE_OP_CATEGORY:
750 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000751 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000752 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000754 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000756 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 case SRE_OP_LITERAL:
760 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000761 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000762 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000763 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000764 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000765 pattern++;
766 ptr++;
767 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000768
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000769 case SRE_OP_NOT_LITERAL:
770 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000771 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000772 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000773 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000774 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000775 pattern++;
776 ptr++;
777 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000778
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000779 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000780 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000781 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000782 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000783 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
784 return 0;
785 ptr++;
786 break;
787
788 case SRE_OP_ANY_ALL:
789 /* match anything */
790 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000791 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000793 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000794 ptr++;
795 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000796
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000797 case SRE_OP_IN:
798 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000799 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000800 TRACE(("|%p|%p|IN\n", pattern, ptr));
801 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000802 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000803 pattern += pattern[0];
804 ptr++;
805 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000806
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000807 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000808 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000809 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000810 i = pattern[0];
811 {
812 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
813 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
814 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000815 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 while (p < e) {
817 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000818 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000819 p++; ptr++;
820 }
821 }
822 pattern++;
823 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000824
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000825 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000826 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000827 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000828 i = pattern[0];
829 {
830 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
831 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
832 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000833 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000834 while (p < e) {
835 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000836 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000837 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000838 p++; ptr++;
839 }
840 }
841 pattern++;
842 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000843
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000844 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000845 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000846 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000847 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000848 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000849 pattern++;
850 ptr++;
851 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000852
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000853 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000854 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000855 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000856 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000857 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000858 pattern++;
859 ptr++;
860 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000862 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000863 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000864 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000865 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000866 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000867 pattern += pattern[0];
868 ptr++;
869 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000870
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000871 case SRE_OP_MARK:
872 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000873 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000874 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000875 i = pattern[0];
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000876 if (i > state->lastmark) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000877 state->lastmark = i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000878 if (i & 1)
879 state->lastindex = i/2 + 1;
880 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000881 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000882 pattern++;
883 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000884
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000885 case SRE_OP_JUMP:
886 case SRE_OP_INFO:
887 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000888 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000889 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000890 pattern += pattern[0];
891 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000892
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000893 case SRE_OP_ASSERT:
894 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000895 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000896 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000897 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000898 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000899 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000900 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000901 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000902 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000903 pattern += pattern[0];
904 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000905
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000906 case SRE_OP_ASSERT_NOT:
907 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000908 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000909 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000910 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000911 if (state->ptr >= state->beginning) {
912 i = SRE_MATCH(state, pattern + 2, level + 1);
913 if (i < 0)
914 return i;
915 if (i)
916 return 0;
917 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000918 pattern += pattern[0];
919 break;
920
921 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000922 /* alternation */
923 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000924 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000925 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000926 for (; pattern[0]; pattern += pattern[0]) {
927 if (pattern[1] == SRE_OP_LITERAL &&
928 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
929 continue;
930 if (pattern[1] == SRE_OP_IN &&
931 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
932 continue;
933 state->ptr = ptr;
934 i = SRE_MATCH(state, pattern + 1, level + 1);
935 if (i)
936 return i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000937 lastmark_restore(state, lastmark);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000938 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000939 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000940
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000941 case SRE_OP_REPEAT_ONE:
942 /* match repeated sequence (maximizing regexp) */
943
944 /* this operator only works if the repeated item is
945 exactly one character wide, and we're not already
946 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000947 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000948
949 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
950
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000951 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000952 pattern[1], pattern[2]));
953
Fredrik Lundhe1869832000-08-01 22:47:49 +0000954 if (ptr + pattern[1] > end)
955 return 0; /* cannot match */
956
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000957 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000958
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000959 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
960 if (count < 0)
961 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000962
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000963 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000964
965 /* when we arrive here, count contains the number of
966 matches, and ptr points to the tail of the target
967 string. check if the rest of the pattern matches,
968 and backtrack if not. */
969
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000970 if (count < (int) pattern[1])
971 return 0;
972
973 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
974 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000975 state->ptr = ptr;
976 return 1;
977
978 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
979 /* tail starts with a literal. skip positions where
980 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000981 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000982 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000983 while (count >= (int) pattern[1] &&
984 (ptr >= end || *ptr != chr)) {
985 ptr--;
986 count--;
987 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000988 if (count < (int) pattern[1])
989 break;
990 state->ptr = ptr;
991 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000992 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000993 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000994 ptr--;
995 count--;
996 }
997
998 } else {
999 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001000 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001001 while (count >= (int) pattern[1]) {
1002 state->ptr = ptr;
1003 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001004 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +00001005 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001006 ptr--;
1007 count--;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +00001008 lastmark_restore(state, lastmark);
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001009 }
1010 }
1011 return 0;
1012
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001013 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001014 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001015 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001016 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001017 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001018 pattern[1], pattern[2]));
1019
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001020 rep.count = -1;
1021 rep.pattern = pattern;
1022
1023 /* install new repeat context */
1024 rep.prev = state->repeat;
1025 state->repeat = &rep;
1026
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001027 state->ptr = ptr;
1028 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001029
1030 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001031
1032 return i;
1033
1034 case SRE_OP_MAX_UNTIL:
1035 /* maximizing repeat */
1036 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1037
1038 /* FIXME: we probably need to deal with zero-width
1039 matches in here... */
1040
1041 rp = state->repeat;
1042 if (!rp)
1043 return SRE_ERROR_STATE;
1044
1045 state->ptr = ptr;
1046
1047 count = rp->count + 1;
1048
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001049 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001050
1051 if (count < rp->pattern[1]) {
1052 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001053 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001054 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001055 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001056 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001057 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001058 rp->count = count - 1;
1059 state->ptr = ptr;
1060 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001061 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001062
1063 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001064 /* we may have enough matches, but if we can
1065 match another item, do so */
1066 rp->count = count;
1067 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001068 i = mark_save(state, 0, lastmark);
1069 if (i < 0)
1070 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001071 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001072 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001073 if (i)
1074 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001075 i = mark_restore(state, 0, lastmark);
1076 if (i < 0)
1077 return i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +00001078 lastmark_restore(state, lastmark);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001079 rp->count = count - 1;
1080 state->ptr = ptr;
1081 }
1082
1083 /* cannot match more repeated items here. make sure the
1084 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001085 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001086 i = SRE_MATCH(state, pattern, level + 1);
1087 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001088 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001089 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001090 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001091 return 0;
1092
1093 case SRE_OP_MIN_UNTIL:
1094 /* minimizing repeat */
1095 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1096
1097 rp = state->repeat;
1098 if (!rp)
1099 return SRE_ERROR_STATE;
1100
1101 count = rp->count + 1;
1102
Fredrik Lundh770617b2001-01-14 15:06:11 +00001103 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1104 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001105
1106 state->ptr = ptr;
1107
1108 if (count < rp->pattern[1]) {
1109 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001110 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001111 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001112 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001113 if (i)
1114 return i;
1115 rp->count = count-1;
1116 state->ptr = ptr;
1117 return 0;
1118 }
1119
1120 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001121 state->repeat = rp->prev;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001122 i = SRE_MATCH(state, pattern, level + 1);
1123 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001124 return i;
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001125
Fredrik Lundh770617b2001-01-14 15:06:11 +00001126 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001127 state->repeat = rp;
1128
1129 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1130 return 0;
1131
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001132 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001133 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001134 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001135 if (i)
1136 return i;
1137 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001138 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001139 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001140
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001141 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001142 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001143 return SRE_ERROR_ILLEGAL;
1144 }
1145 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001146
Sjoerd Mullender89dfe9e2001-08-30 14:37:07 +00001147 /* can't end up here */
Fredrik Lundh21009b92001-09-18 18:47:09 +00001148 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
Guido van Rossumb700df92000-03-31 14:59:30 +00001149}
1150
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001151LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001152SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1153{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001154 SRE_CHAR* ptr = state->start;
1155 SRE_CHAR* end = state->end;
1156 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001157 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001158 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001159 SRE_CODE* prefix = NULL;
1160 SRE_CODE* charset = NULL;
1161 SRE_CODE* overlap = NULL;
1162 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001163
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001164 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001165 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001166 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001167
1168 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001169
1170 if (pattern[3] > 0) {
1171 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001172 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001173 end -= pattern[3]-1;
1174 if (end <= ptr)
1175 end = ptr+1;
1176 }
1177
Fredrik Lundh3562f112000-07-02 12:00:07 +00001178 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001179 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001180 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001181 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001182 prefix_skip = pattern[6];
1183 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001184 overlap = prefix + prefix_len - 1;
1185 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001186 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001187 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001188 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001189
1190 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001191 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001192
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001193 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1194 TRACE(("charset = %p\n", charset));
1195
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001196#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001197 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001198 /* pattern starts with a known prefix. use the overlap
1199 table to skip forward as fast as we possibly can */
1200 int i = 0;
1201 end = state->end;
1202 while (ptr < end) {
1203 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001204 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001205 if (!i)
1206 break;
1207 else
1208 i = overlap[i];
1209 } else {
1210 if (++i == prefix_len) {
1211 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001212 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1213 state->start = ptr + 1 - prefix_len;
1214 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001215 if (flags & SRE_INFO_LITERAL)
1216 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001217 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001218 if (status != 0)
1219 return status;
1220 /* close but no cigar -- try again */
1221 i = overlap[i];
1222 }
1223 break;
1224 }
1225
1226 }
1227 ptr++;
1228 }
1229 return 0;
1230 }
1231#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001232
Fredrik Lundh3562f112000-07-02 12:00:07 +00001233 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001234 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001235 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001236 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001237 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001238 for (;;) {
1239 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1240 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001241 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001243 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001244 state->start = ptr;
1245 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001246 if (flags & SRE_INFO_LITERAL)
1247 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001248 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001249 if (status != 0)
1250 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001251 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001252 } else if (charset) {
1253 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001254 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001255 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001256 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001257 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001258 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001259 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001260 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001261 state->start = ptr;
1262 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001263 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001264 if (status != 0)
1265 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001266 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001267 }
1268 } else
1269 /* general case */
1270 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001271 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001272 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001273 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001274 if (status != 0)
1275 break;
1276 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001277
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001278 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001279}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001280
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001281LOCAL(int)
1282SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1283{
1284 /* check if given string is a literal template (i.e. no escapes) */
1285 while (len-- > 0)
1286 if (*ptr++ == '\\')
1287 return 0;
1288 return 1;
1289}
Guido van Rossumb700df92000-03-31 14:59:30 +00001290
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001291#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001292
1293/* -------------------------------------------------------------------- */
1294/* factories and destructors */
1295
1296/* see sre.h for object declarations */
1297
Jeremy Hylton938ace62002-07-17 16:30:39 +00001298static PyTypeObject Pattern_Type;
1299static PyTypeObject Match_Type;
1300static PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001301
1302static PyObject *
1303_compile(PyObject* self_, PyObject* args)
1304{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001305 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001306
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001307 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001308 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001309
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001310 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001311 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001312 PyObject* code;
1313 int groups = 0;
1314 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001315 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001316 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1317 &PyList_Type, &code, &groups,
1318 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001319 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001320
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001321 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001322
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001323 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001324 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001325 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001326
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001327 self->codesize = n;
1328
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001329 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001330 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001331 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001332 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001333
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001334 if (PyErr_Occurred()) {
1335 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001336 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001337 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001338
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001339 Py_INCREF(pattern);
1340 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001341
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001342 self->flags = flags;
1343
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001344 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001345
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001346 Py_XINCREF(groupindex);
1347 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001348
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001349 Py_XINCREF(indexgroup);
1350 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001352 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001353}
1354
1355static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001356sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001357{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001358 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001359}
1360
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001361static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001362sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001363{
1364 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001365 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001366 return NULL;
1367 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001368 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001369 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001370#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001371 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001372#else
1373 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001374#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001375 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001376}
1377
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001378LOCAL(void)
1379state_reset(SRE_STATE* state)
1380{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001381 state->lastmark = 0;
1382
1383 /* FIXME: dynamic! */
Neal Norwitz35fc7602002-06-13 21:11:11 +00001384 memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001385
1386 state->lastindex = -1;
1387
1388 state->repeat = NULL;
1389
1390 mark_fini(state);
1391}
1392
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001393static void*
1394getstring(PyObject* string, int* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001395{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001396 /* given a python object, return a data pointer, a length (in
1397 characters), and a character size. return NULL if the object
1398 is not a string (or not compatible) */
1399
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001400 PyBufferProcs *buffer;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001401 int size, bytes, charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001402 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001403
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001404#if defined(HAVE_UNICODE)
1405 if (PyUnicode_Check(string)) {
1406 /* unicode strings doesn't always support the buffer interface */
1407 ptr = (void*) PyUnicode_AS_DATA(string);
1408 bytes = PyUnicode_GET_DATA_SIZE(string);
1409 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001410 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001411
1412 } else {
1413#endif
1414
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001415 /* get pointer to string buffer */
1416 buffer = string->ob_type->tp_as_buffer;
1417 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1418 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001419 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001420 return NULL;
1421 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001423 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001424 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1425 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001426 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1427 return NULL;
1428 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001430 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001431#if PY_VERSION_HEX >= 0x01060000
1432 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001433#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001434 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001435#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001436
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001437 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001438 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001439#if defined(HAVE_UNICODE)
1440 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001441 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001442#endif
1443 else {
1444 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1445 return NULL;
1446 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001447
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001448#if defined(HAVE_UNICODE)
1449 }
1450#endif
1451
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001452 *p_length = size;
1453 *p_charsize = charsize;
1454
1455 return ptr;
1456}
1457
1458LOCAL(PyObject*)
1459state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1460 int start, int end)
1461{
1462 /* prepare state object */
1463
1464 int length;
1465 int charsize;
1466 void* ptr;
1467
1468 memset(state, 0, sizeof(SRE_STATE));
1469
1470 state->lastindex = -1;
1471
1472 ptr = getstring(string, &length, &charsize);
1473 if (!ptr)
1474 return NULL;
1475
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001476 /* adjust boundaries */
1477 if (start < 0)
1478 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001479 else if (start > length)
1480 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001481
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001482 if (end < 0)
1483 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001484 else if (end > length)
1485 end = length;
1486
1487 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001488
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001489 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001490
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001491 state->start = (void*) ((char*) ptr + start * state->charsize);
1492 state->end = (void*) ((char*) ptr + end * state->charsize);
1493
1494 Py_INCREF(string);
1495 state->string = string;
1496 state->pos = start;
1497 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001498
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001499 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001500 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001501 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001502#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001503 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001504#else
1505 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001506#endif
1507 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001508 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001509
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001510 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001511}
1512
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001513LOCAL(void)
1514state_fini(SRE_STATE* state)
1515{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001516 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001517 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001518}
1519
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001520/* calculate offset from start of string */
1521#define STATE_OFFSET(state, member)\
1522 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1523
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001524LOCAL(PyObject*)
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001525state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001526{
Fredrik Lundh58100642000-08-09 09:14:35 +00001527 int i, j;
1528
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001529 index = (index - 1) * 2;
1530
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001531 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001532 if (empty)
1533 /* want empty string */
1534 i = j = 0;
1535 else {
1536 Py_INCREF(Py_None);
1537 return Py_None;
1538 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001539 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001540 i = STATE_OFFSET(state, state->mark[index]);
1541 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001542 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001543
Fredrik Lundh58100642000-08-09 09:14:35 +00001544 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001545}
1546
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001547static void
1548pattern_error(int status)
1549{
1550 switch (status) {
1551 case SRE_ERROR_RECURSION_LIMIT:
1552 PyErr_SetString(
1553 PyExc_RuntimeError,
1554 "maximum recursion limit exceeded"
1555 );
1556 break;
1557 case SRE_ERROR_MEMORY:
1558 PyErr_NoMemory();
1559 break;
1560 default:
1561 /* other error codes indicate compiler/engine bugs */
1562 PyErr_SetString(
1563 PyExc_RuntimeError,
1564 "internal error in regular expression engine"
1565 );
1566 }
1567}
1568
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001569static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001570pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001571{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001573
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001574 MatchObject* match;
1575 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001576 char* base;
1577 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001578
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001579 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001580
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001581 /* create match object (with room for extra group marks) */
1582 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001583 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001584 if (!match)
1585 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001586
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001587 Py_INCREF(pattern);
1588 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001589
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001590 Py_INCREF(state->string);
1591 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001592
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001593 match->regs = NULL;
1594 match->groups = pattern->groups+1;
1595
1596 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001597
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001598 base = (char*) state->beginning;
1599 n = state->charsize;
1600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001601 match->mark[0] = ((char*) state->start - base) / n;
1602 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001603
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001604 for (i = j = 0; i < pattern->groups; i++, j+=2)
1605 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1606 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1607 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1608 } else
1609 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1610
1611 match->pos = state->pos;
1612 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001613
Fredrik Lundh6f013982000-07-03 18:44:21 +00001614 match->lastindex = state->lastindex;
1615
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001616 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001617
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001618 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001619
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001620 /* no match */
1621 Py_INCREF(Py_None);
1622 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001623
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001624 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001625
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001626 /* internal error */
1627 pattern_error(status);
1628 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001629}
1630
1631static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001632pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001633{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001634 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001635
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001636 ScannerObject* self;
1637
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001638 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001639 int start = 0;
1640 int end = INT_MAX;
1641 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1642 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001643
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001644 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001645 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001646 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001647 return NULL;
1648
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001649 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001650 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001651 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001652 return NULL;
1653 }
1654
1655 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001656 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001657
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001658 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001659}
1660
Guido van Rossumb700df92000-03-31 14:59:30 +00001661static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001662pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001663{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001664 Py_XDECREF(self->pattern);
1665 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001666 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001667 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001668}
1669
1670static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001671pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001672{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001673 SRE_STATE state;
1674 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001676 PyObject* string;
1677 int start = 0;
1678 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001679 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1680 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1681 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001682 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001683
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001684 string = state_init(&state, self, string, start, end);
1685 if (!string)
1686 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001687
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001688 state.ptr = state.start;
1689
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001690 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1691
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001692 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001693 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001694 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001695#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001696 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001697#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001698 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001699
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001700 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1701
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001703
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001704 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001705}
1706
1707static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001708pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001709{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001710 SRE_STATE state;
1711 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001712
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001713 PyObject* string;
1714 int start = 0;
1715 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001716 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1717 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1718 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001719 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001720
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001721 string = state_init(&state, self, string, start, end);
1722 if (!string)
1723 return NULL;
1724
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001725 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001727 if (state.charsize == 1) {
1728 status = sre_search(&state, PatternObject_GetCode(self));
1729 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001730#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001731 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001732#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001733 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001734
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001735 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1736
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001737 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001738
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001739 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001740}
1741
1742static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001743call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001744{
1745 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001746 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001747 PyObject* func;
1748 PyObject* result;
1749
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001750 if (!args)
1751 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001752 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001753 if (!name)
1754 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001755 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001756 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001757 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001758 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001759 func = PyObject_GetAttrString(mod, function);
1760 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001761 if (!func)
1762 return NULL;
1763 result = PyObject_CallObject(func, args);
1764 Py_DECREF(func);
1765 Py_DECREF(args);
1766 return result;
1767}
1768
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001769#ifdef USE_BUILTIN_COPY
1770static int
1771deepcopy(PyObject** object, PyObject* memo)
1772{
1773 PyObject* copy;
1774
1775 copy = call(
1776 "copy", "deepcopy",
1777 Py_BuildValue("OO", *object, memo)
1778 );
1779 if (!copy)
1780 return 0;
1781
1782 Py_DECREF(*object);
1783 *object = copy;
1784
1785 return 1; /* success */
1786}
1787#endif
1788
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001789static PyObject*
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00001790join_list(PyObject* list, PyObject* pattern)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001791{
1792 /* join list elements */
1793
1794 PyObject* joiner;
1795#if PY_VERSION_HEX >= 0x01060000
1796 PyObject* function;
1797 PyObject* args;
1798#endif
1799 PyObject* result;
1800
1801 switch (PyList_GET_SIZE(list)) {
1802 case 0:
1803 Py_DECREF(list);
1804 return PyString_FromString("");
1805 case 1:
1806 result = PyList_GET_ITEM(list, 0);
1807 Py_INCREF(result);
1808 Py_DECREF(list);
1809 return result;
1810 }
1811
1812 /* two or more elements: slice out a suitable separator from the
1813 first member, and use that to join the entire list */
1814
1815 joiner = PySequence_GetSlice(pattern, 0, 0);
1816 if (!joiner)
1817 return NULL;
1818
1819#if PY_VERSION_HEX >= 0x01060000
1820 function = PyObject_GetAttrString(joiner, "join");
1821 if (!function) {
1822 Py_DECREF(joiner);
1823 return NULL;
1824 }
1825 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001826 if (!args) {
1827 Py_DECREF(function);
1828 Py_DECREF(joiner);
1829 return NULL;
1830 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001831 PyTuple_SET_ITEM(args, 0, list);
1832 result = PyObject_CallObject(function, args);
1833 Py_DECREF(args); /* also removes list */
1834 Py_DECREF(function);
1835#else
1836 result = call(
1837 "string", "join",
1838 Py_BuildValue("OO", list, joiner)
1839 );
1840#endif
1841 Py_DECREF(joiner);
1842
1843 return result;
1844}
1845
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001846static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001847pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001848{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001849 SRE_STATE state;
1850 PyObject* list;
1851 int status;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001852 int i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00001853
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001854 PyObject* string;
1855 int start = 0;
1856 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001857 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1858 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1859 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001860 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001861
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001862 string = state_init(&state, self, string, start, end);
1863 if (!string)
1864 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001865
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001866 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001867 if (!list) {
1868 state_fini(&state);
1869 return NULL;
1870 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001871
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001873
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001874 PyObject* item;
1875
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001876 state_reset(&state);
1877
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001878 state.ptr = state.start;
1879
1880 if (state.charsize == 1) {
1881 status = sre_search(&state, PatternObject_GetCode(self));
1882 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001883#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001884 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001885#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001886 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001887
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001888 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001889 if (status == 0)
1890 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001891 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001892 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001894
1895 /* don't bother to build a match object */
1896 switch (self->groups) {
1897 case 0:
1898 b = STATE_OFFSET(&state, state.start);
1899 e = STATE_OFFSET(&state, state.ptr);
1900 item = PySequence_GetSlice(string, b, e);
1901 if (!item)
1902 goto error;
1903 break;
1904 case 1:
1905 item = state_getslice(&state, 1, string, 1);
1906 if (!item)
1907 goto error;
1908 break;
1909 default:
1910 item = PyTuple_New(self->groups);
1911 if (!item)
1912 goto error;
1913 for (i = 0; i < self->groups; i++) {
1914 PyObject* o = state_getslice(&state, i+1, string, 1);
1915 if (!o) {
1916 Py_DECREF(item);
1917 goto error;
1918 }
1919 PyTuple_SET_ITEM(item, i, o);
1920 }
1921 break;
1922 }
1923
1924 status = PyList_Append(list, item);
1925 Py_DECREF(item);
1926 if (status < 0)
1927 goto error;
1928
1929 if (state.ptr == state.start)
1930 state.start = (void*) ((char*) state.ptr + state.charsize);
1931 else
1932 state.start = state.ptr;
1933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001935
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001936 state_fini(&state);
1937 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001938
1939error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001940 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001941 state_fini(&state);
1942 return NULL;
1943
Guido van Rossumb700df92000-03-31 14:59:30 +00001944}
1945
Fredrik Lundh703ce812001-10-24 22:16:30 +00001946#if PY_VERSION_HEX >= 0x02020000
1947static PyObject*
1948pattern_finditer(PatternObject* pattern, PyObject* args)
1949{
1950 PyObject* scanner;
1951 PyObject* search;
1952 PyObject* iterator;
1953
1954 scanner = pattern_scanner(pattern, args);
1955 if (!scanner)
1956 return NULL;
1957
1958 search = PyObject_GetAttrString(scanner, "search");
1959 Py_DECREF(scanner);
1960 if (!search)
1961 return NULL;
1962
1963 iterator = PyCallIter_New(search, Py_None);
1964 Py_DECREF(search);
1965
1966 return iterator;
1967}
1968#endif
1969
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001970static PyObject*
1971pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
1972{
1973 SRE_STATE state;
1974 PyObject* list;
1975 PyObject* item;
1976 int status;
1977 int n;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001978 int i;
1979 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001980
1981 PyObject* string;
1982 int maxsplit = 0;
1983 static char* kwlist[] = { "source", "maxsplit", NULL };
1984 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
1985 &string, &maxsplit))
1986 return NULL;
1987
1988 string = state_init(&state, self, string, 0, INT_MAX);
1989 if (!string)
1990 return NULL;
1991
1992 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001993 if (!list) {
1994 state_fini(&state);
1995 return NULL;
1996 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001997
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001998 n = 0;
1999 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002000
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002001 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002002
2003 state_reset(&state);
2004
2005 state.ptr = state.start;
2006
2007 if (state.charsize == 1) {
2008 status = sre_search(&state, PatternObject_GetCode(self));
2009 } else {
2010#if defined(HAVE_UNICODE)
2011 status = sre_usearch(&state, PatternObject_GetCode(self));
2012#endif
2013 }
2014
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002015 if (status <= 0) {
2016 if (status == 0)
2017 break;
2018 pattern_error(status);
2019 goto error;
2020 }
2021
2022 if (state.start == state.ptr) {
2023 if (last == state.end)
2024 break;
2025 /* skip one character */
2026 state.start = (void*) ((char*) state.ptr + state.charsize);
2027 continue;
2028 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002029
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002030 /* get segment before this match */
2031 item = PySequence_GetSlice(
2032 string, STATE_OFFSET(&state, last),
2033 STATE_OFFSET(&state, state.start)
2034 );
2035 if (!item)
2036 goto error;
2037 status = PyList_Append(list, item);
2038 Py_DECREF(item);
2039 if (status < 0)
2040 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002041
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002042 /* add groups (if any) */
2043 for (i = 0; i < self->groups; i++) {
2044 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002045 if (!item)
2046 goto error;
2047 status = PyList_Append(list, item);
2048 Py_DECREF(item);
2049 if (status < 0)
2050 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002051 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002052
2053 n = n + 1;
2054
2055 last = state.start = state.ptr;
2056
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002057 }
2058
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002059 /* get segment following last match (even if empty) */
2060 item = PySequence_GetSlice(
2061 string, STATE_OFFSET(&state, last), state.endpos
2062 );
2063 if (!item)
2064 goto error;
2065 status = PyList_Append(list, item);
2066 Py_DECREF(item);
2067 if (status < 0)
2068 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002069
2070 state_fini(&state);
2071 return list;
2072
2073error:
2074 Py_DECREF(list);
2075 state_fini(&state);
2076 return NULL;
2077
2078}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002079
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002080static PyObject*
2081pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2082 int count, int subn)
2083{
2084 SRE_STATE state;
2085 PyObject* list;
2086 PyObject* item;
2087 PyObject* filter;
2088 PyObject* args;
2089 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002090 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002091 int status;
2092 int n;
2093 int i, b, e;
2094 int filter_is_callable;
2095
Fredrik Lundhdac58492001-10-21 21:48:30 +00002096 if (PyCallable_Check(template)) {
2097 /* sub/subn takes either a function or a template */
2098 filter = template;
2099 Py_INCREF(filter);
2100 filter_is_callable = 1;
2101 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002102 /* if not callable, check if it's a literal string */
2103 int literal;
2104 ptr = getstring(template, &n, &b);
2105 if (ptr) {
2106 if (b == 1) {
2107 literal = sre_literal_template(ptr, n);
2108 } else {
2109#if defined(HAVE_UNICODE)
2110 literal = sre_uliteral_template(ptr, n);
2111#endif
2112 }
2113 } else {
2114 PyErr_Clear();
2115 literal = 0;
2116 }
2117 if (literal) {
2118 filter = template;
2119 Py_INCREF(filter);
2120 filter_is_callable = 0;
2121 } else {
2122 /* not a literal; hand it over to the template compiler */
2123 filter = call(
2124 SRE_MODULE, "_subx",
2125 Py_BuildValue("OO", self, template)
2126 );
2127 if (!filter)
2128 return NULL;
2129 filter_is_callable = PyCallable_Check(filter);
2130 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002131 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002132
2133 string = state_init(&state, self, string, 0, INT_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002134 if (!string) {
2135 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002136 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002137 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002138
2139 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002140 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002141 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002142 state_fini(&state);
2143 return NULL;
2144 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002145
2146 n = i = 0;
2147
2148 while (!count || n < count) {
2149
2150 state_reset(&state);
2151
2152 state.ptr = state.start;
2153
2154 if (state.charsize == 1) {
2155 status = sre_search(&state, PatternObject_GetCode(self));
2156 } else {
2157#if defined(HAVE_UNICODE)
2158 status = sre_usearch(&state, PatternObject_GetCode(self));
2159#endif
2160 }
2161
2162 if (status <= 0) {
2163 if (status == 0)
2164 break;
2165 pattern_error(status);
2166 goto error;
2167 }
2168
2169 b = STATE_OFFSET(&state, state.start);
2170 e = STATE_OFFSET(&state, state.ptr);
2171
2172 if (i < b) {
2173 /* get segment before this match */
2174 item = PySequence_GetSlice(string, i, b);
2175 if (!item)
2176 goto error;
2177 status = PyList_Append(list, item);
2178 Py_DECREF(item);
2179 if (status < 0)
2180 goto error;
2181
2182 } else if (i == b && i == e && n > 0)
2183 /* ignore empty match on latest position */
2184 goto next;
2185
2186 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002187 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002188 match = pattern_new_match(self, &state, 1);
2189 if (!match)
2190 goto error;
2191 args = Py_BuildValue("(O)", match);
2192 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002193 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002194 goto error;
2195 }
2196 item = PyObject_CallObject(filter, args);
2197 Py_DECREF(args);
2198 Py_DECREF(match);
2199 if (!item)
2200 goto error;
2201 } else {
2202 /* filter is literal string */
2203 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002204 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002205 }
2206
2207 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002208 if (item != Py_None) {
2209 status = PyList_Append(list, item);
2210 Py_DECREF(item);
2211 if (status < 0)
2212 goto error;
2213 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002214
2215 i = e;
2216 n = n + 1;
2217
2218next:
2219 /* move on */
2220 if (state.ptr == state.start)
2221 state.start = (void*) ((char*) state.ptr + state.charsize);
2222 else
2223 state.start = state.ptr;
2224
2225 }
2226
2227 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002228 if (i < state.endpos) {
2229 item = PySequence_GetSlice(string, i, state.endpos);
2230 if (!item)
2231 goto error;
2232 status = PyList_Append(list, item);
2233 Py_DECREF(item);
2234 if (status < 0)
2235 goto error;
2236 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002237
2238 state_fini(&state);
2239
Guido van Rossum4e173842001-12-07 04:25:10 +00002240 Py_DECREF(filter);
2241
Fredrik Lundhdac58492001-10-21 21:48:30 +00002242 /* convert list to single string (also removes list) */
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00002243 item = join_list(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002244
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002245 if (!item)
2246 return NULL;
2247
2248 if (subn)
2249 return Py_BuildValue("Ni", item, n);
2250
2251 return item;
2252
2253error:
2254 Py_DECREF(list);
2255 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002256 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002257 return NULL;
2258
2259}
2260
2261static PyObject*
2262pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2263{
2264 PyObject* template;
2265 PyObject* string;
2266 int count = 0;
2267 static char* kwlist[] = { "repl", "string", "count", NULL };
2268 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2269 &template, &string, &count))
2270 return NULL;
2271
2272 return pattern_subx(self, template, string, count, 0);
2273}
2274
2275static PyObject*
2276pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2277{
2278 PyObject* template;
2279 PyObject* string;
2280 int count = 0;
2281 static char* kwlist[] = { "repl", "string", "count", NULL };
2282 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2283 &template, &string, &count))
2284 return NULL;
2285
2286 return pattern_subx(self, template, string, count, 1);
2287}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002288
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002289static PyObject*
2290pattern_copy(PatternObject* self, PyObject* args)
2291{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002292#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002293 PatternObject* copy;
2294 int offset;
2295
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002296 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2297 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002298
2299 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2300 if (!copy)
2301 return NULL;
2302
2303 offset = offsetof(PatternObject, groups);
2304
2305 Py_XINCREF(self->groupindex);
2306 Py_XINCREF(self->indexgroup);
2307 Py_XINCREF(self->pattern);
2308
2309 memcpy((char*) copy + offset, (char*) self + offset,
2310 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2311
2312 return (PyObject*) copy;
2313#else
2314 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2315 return NULL;
2316#endif
2317}
2318
2319static PyObject*
2320pattern_deepcopy(PatternObject* self, PyObject* args)
2321{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002322#ifdef USE_BUILTIN_COPY
2323 PatternObject* copy;
2324
2325 PyObject* memo;
2326 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2327 return NULL;
2328
2329 copy = (PatternObject*) pattern_copy(self, Py_None);
2330 if (!copy)
2331 return NULL;
2332
2333 if (!deepcopy(&copy->groupindex, memo) ||
2334 !deepcopy(&copy->indexgroup, memo) ||
2335 !deepcopy(&copy->pattern, memo)) {
2336 Py_DECREF(copy);
2337 return NULL;
2338 }
2339
2340#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002341 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2342 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002343#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002344}
2345
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002346static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002347 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2348 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2349 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2350 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2351 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2352 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002353#if PY_VERSION_HEX >= 0x02020000
2354 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS},
2355#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002356 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002357 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2358 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002359 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002360};
2361
2362static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002363pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002364{
2365 PyObject* res;
2366
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002367 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002368
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002369 if (res)
2370 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002371
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002372 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002373
2374 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002375 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002376 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002377 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002378 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002379
2380 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002381 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002382
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002383 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002384 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002385
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002386 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002387 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002388 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002389 }
2390
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002391 PyErr_SetString(PyExc_AttributeError, name);
2392 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002393}
2394
2395statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002396 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002397 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002398 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002399 (destructor)pattern_dealloc, /*tp_dealloc*/
2400 0, /*tp_print*/
2401 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002402};
2403
2404/* -------------------------------------------------------------------- */
2405/* match methods */
2406
2407static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002408match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002409{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002410 Py_XDECREF(self->regs);
2411 Py_XDECREF(self->string);
2412 Py_DECREF(self->pattern);
2413 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002414}
2415
2416static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002417match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002418{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002419 if (index < 0 || index >= self->groups) {
2420 /* raise IndexError if we were given a bad group number */
2421 PyErr_SetString(
2422 PyExc_IndexError,
2423 "no such group"
2424 );
2425 return NULL;
2426 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002427
Fredrik Lundh6f013982000-07-03 18:44:21 +00002428 index *= 2;
2429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002430 if (self->string == Py_None || self->mark[index] < 0) {
2431 /* return default value if the string or group is undefined */
2432 Py_INCREF(def);
2433 return def;
2434 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002435
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002436 return PySequence_GetSlice(
2437 self->string, self->mark[index], self->mark[index+1]
2438 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002439}
2440
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002441static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002442match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002443{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002444 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002445
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002446 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002447 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002448
Fredrik Lundh6f013982000-07-03 18:44:21 +00002449 i = -1;
2450
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002451 if (self->pattern->groupindex) {
2452 index = PyObject_GetItem(self->pattern->groupindex, index);
2453 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002454 if (PyInt_Check(index))
2455 i = (int) PyInt_AS_LONG(index);
2456 Py_DECREF(index);
2457 } else
2458 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002459 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002460
2461 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002462}
2463
2464static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002465match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002466{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002467 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002468}
2469
2470static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002471match_expand(MatchObject* self, PyObject* args)
2472{
2473 PyObject* template;
2474 if (!PyArg_ParseTuple(args, "O:expand", &template))
2475 return NULL;
2476
2477 /* delegate to Python code */
2478 return call(
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002479 SRE_MODULE, "_expand",
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002480 Py_BuildValue("OOO", self->pattern, self, template)
2481 );
2482}
2483
2484static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002485match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002486{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002487 PyObject* result;
2488 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002489
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002490 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002491
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002492 switch (size) {
2493 case 0:
2494 result = match_getslice(self, Py_False, Py_None);
2495 break;
2496 case 1:
2497 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2498 break;
2499 default:
2500 /* fetch multiple items */
2501 result = PyTuple_New(size);
2502 if (!result)
2503 return NULL;
2504 for (i = 0; i < size; i++) {
2505 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002506 self, PyTuple_GET_ITEM(args, i), Py_None
2507 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002508 if (!item) {
2509 Py_DECREF(result);
2510 return NULL;
2511 }
2512 PyTuple_SET_ITEM(result, i, item);
2513 }
2514 break;
2515 }
2516 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002517}
2518
2519static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002520match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002521{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002522 PyObject* result;
2523 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002524
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002525 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002526 static char* kwlist[] = { "default", NULL };
2527 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002528 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002529
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002530 result = PyTuple_New(self->groups-1);
2531 if (!result)
2532 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002533
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002534 for (index = 1; index < self->groups; index++) {
2535 PyObject* item;
2536 item = match_getslice_by_index(self, index, def);
2537 if (!item) {
2538 Py_DECREF(result);
2539 return NULL;
2540 }
2541 PyTuple_SET_ITEM(result, index-1, item);
2542 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002543
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002544 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002545}
2546
2547static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002548match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002549{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002550 PyObject* result;
2551 PyObject* keys;
2552 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002553
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002554 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002555 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002556 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002557 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002558
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002559 result = PyDict_New();
2560 if (!result || !self->pattern->groupindex)
2561 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002562
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002563 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002564 if (!keys)
2565 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002566
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002567 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002568 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002569 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002570 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002571 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002572 if (!key)
2573 goto failed;
2574 value = match_getslice(self, key, def);
2575 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002576 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002577 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002578 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002579 status = PyDict_SetItem(result, key, value);
2580 Py_DECREF(value);
2581 if (status < 0)
2582 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002583 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002585 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002586
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002587 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002588
2589failed:
2590 Py_DECREF(keys);
2591 Py_DECREF(result);
2592 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002593}
2594
2595static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002596match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002597{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002598 int index;
2599
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002600 PyObject* index_ = Py_False; /* zero */
2601 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2602 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002603
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002604 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002606 if (index < 0 || index >= self->groups) {
2607 PyErr_SetString(
2608 PyExc_IndexError,
2609 "no such group"
2610 );
2611 return NULL;
2612 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002613
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002614 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002615 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002616}
2617
2618static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002619match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002620{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002621 int index;
2622
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002623 PyObject* index_ = Py_False; /* zero */
2624 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2625 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002626
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002627 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002628
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002629 if (index < 0 || index >= self->groups) {
2630 PyErr_SetString(
2631 PyExc_IndexError,
2632 "no such group"
2633 );
2634 return NULL;
2635 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002636
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002637 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002638 return Py_BuildValue("i", self->mark[index*2+1]);
2639}
2640
2641LOCAL(PyObject*)
2642_pair(int i1, int i2)
2643{
2644 PyObject* pair;
2645 PyObject* item;
2646
2647 pair = PyTuple_New(2);
2648 if (!pair)
2649 return NULL;
2650
2651 item = PyInt_FromLong(i1);
2652 if (!item)
2653 goto error;
2654 PyTuple_SET_ITEM(pair, 0, item);
2655
2656 item = PyInt_FromLong(i2);
2657 if (!item)
2658 goto error;
2659 PyTuple_SET_ITEM(pair, 1, item);
2660
2661 return pair;
2662
2663 error:
2664 Py_DECREF(pair);
2665 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002666}
2667
2668static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002669match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002670{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002671 int index;
2672
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002673 PyObject* index_ = Py_False; /* zero */
2674 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2675 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002676
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002677 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002678
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002679 if (index < 0 || index >= self->groups) {
2680 PyErr_SetString(
2681 PyExc_IndexError,
2682 "no such group"
2683 );
2684 return NULL;
2685 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002686
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002687 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002688 return _pair(self->mark[index*2], self->mark[index*2+1]);
2689}
2690
2691static PyObject*
2692match_regs(MatchObject* self)
2693{
2694 PyObject* regs;
2695 PyObject* item;
2696 int index;
2697
2698 regs = PyTuple_New(self->groups);
2699 if (!regs)
2700 return NULL;
2701
2702 for (index = 0; index < self->groups; index++) {
2703 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2704 if (!item) {
2705 Py_DECREF(regs);
2706 return NULL;
2707 }
2708 PyTuple_SET_ITEM(regs, index, item);
2709 }
2710
2711 Py_INCREF(regs);
2712 self->regs = regs;
2713
2714 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002715}
2716
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002717static PyObject*
2718match_copy(MatchObject* self, PyObject* args)
2719{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002720#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002721 MatchObject* copy;
2722 int slots, offset;
2723
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002724 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2725 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002726
2727 slots = 2 * (self->pattern->groups+1);
2728
2729 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2730 if (!copy)
2731 return NULL;
2732
2733 /* this value a constant, but any compiler should be able to
2734 figure that out all by itself */
2735 offset = offsetof(MatchObject, string);
2736
2737 Py_XINCREF(self->pattern);
2738 Py_XINCREF(self->string);
2739 Py_XINCREF(self->regs);
2740
2741 memcpy((char*) copy + offset, (char*) self + offset,
2742 sizeof(MatchObject) + slots * sizeof(int) - offset);
2743
2744 return (PyObject*) copy;
2745#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002746 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002747 return NULL;
2748#endif
2749}
2750
2751static PyObject*
2752match_deepcopy(MatchObject* self, PyObject* args)
2753{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002754#ifdef USE_BUILTIN_COPY
2755 MatchObject* copy;
2756
2757 PyObject* memo;
2758 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2759 return NULL;
2760
2761 copy = (MatchObject*) match_copy(self, Py_None);
2762 if (!copy)
2763 return NULL;
2764
2765 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2766 !deepcopy(&copy->string, memo) ||
2767 !deepcopy(&copy->regs, memo)) {
2768 Py_DECREF(copy);
2769 return NULL;
2770 }
2771
2772#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002773 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2774 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002775#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002776}
2777
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002778static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002779 {"group", (PyCFunction) match_group, METH_VARARGS},
2780 {"start", (PyCFunction) match_start, METH_VARARGS},
2781 {"end", (PyCFunction) match_end, METH_VARARGS},
2782 {"span", (PyCFunction) match_span, METH_VARARGS},
2783 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2784 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2785 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002786 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2787 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002788 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002789};
2790
2791static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002792match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002793{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002794 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002795
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002796 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2797 if (res)
2798 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002800 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002801
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002802 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002803 if (self->lastindex >= 0)
2804 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002805 Py_INCREF(Py_None);
2806 return Py_None;
2807 }
2808
2809 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002810 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002811 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002812 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002813 );
2814 if (result)
2815 return result;
2816 PyErr_Clear();
2817 }
2818 Py_INCREF(Py_None);
2819 return Py_None;
2820 }
2821
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002822 if (!strcmp(name, "string")) {
2823 if (self->string) {
2824 Py_INCREF(self->string);
2825 return self->string;
2826 } else {
2827 Py_INCREF(Py_None);
2828 return Py_None;
2829 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002830 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002831
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002832 if (!strcmp(name, "regs")) {
2833 if (self->regs) {
2834 Py_INCREF(self->regs);
2835 return self->regs;
2836 } else
2837 return match_regs(self);
2838 }
2839
2840 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002841 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002842 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002843 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002845 if (!strcmp(name, "pos"))
2846 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002847
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002848 if (!strcmp(name, "endpos"))
2849 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002850
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002851 PyErr_SetString(PyExc_AttributeError, name);
2852 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002853}
2854
2855/* FIXME: implement setattr("string", None) as a special case (to
2856 detach the associated string, if any */
2857
2858statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002859 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002860 0, "_" SRE_MODULE ".SRE_Match",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002861 sizeof(MatchObject), sizeof(int),
2862 (destructor)match_dealloc, /*tp_dealloc*/
2863 0, /*tp_print*/
2864 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002865};
2866
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002867/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002868/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002869
2870static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002871scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002872{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002873 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002874 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002875 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002876}
2877
2878static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002879scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002880{
2881 SRE_STATE* state = &self->state;
2882 PyObject* match;
2883 int status;
2884
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002885 state_reset(state);
2886
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002887 state->ptr = state->start;
2888
2889 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002890 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002891 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002892#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002893 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002894#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002895 }
2896
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002897 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002898 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002899
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00002900 if ((status == 0 || state->ptr == state->start) &&
2901 state->ptr < state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002902 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002903 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002904 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002905
2906 return match;
2907}
2908
2909
2910static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002911scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002912{
2913 SRE_STATE* state = &self->state;
2914 PyObject* match;
2915 int status;
2916
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002917 state_reset(state);
2918
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002919 state->ptr = state->start;
2920
2921 if (state->charsize == 1) {
2922 status = sre_search(state, PatternObject_GetCode(self->pattern));
2923 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002924#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002925 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002926#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002927 }
2928
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002929 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002930 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002931
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00002932 if ((status == 0 || state->ptr == state->start) &&
2933 state->ptr < state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002934 state->start = (void*) ((char*) state->ptr + state->charsize);
2935 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002936 state->start = state->ptr;
2937
2938 return match;
2939}
2940
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002941static PyMethodDef scanner_methods[] = {
Neal Norwitzbb2769f2002-03-31 15:46:00 +00002942 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
2943 /* METH_OLDARGS is not in Python 1.5.2 */
2944 {"match", (PyCFunction) scanner_match, 0},
2945 {"search", (PyCFunction) scanner_search, 0},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002946 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002947};
2948
2949static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002950scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002951{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002952 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002953
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002954 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2955 if (res)
2956 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002957
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002958 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002959
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002960 /* attributes */
2961 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002962 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002963 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002964 }
2965
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002966 PyErr_SetString(PyExc_AttributeError, name);
2967 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002968}
2969
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002970statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002971 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002972 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002973 sizeof(ScannerObject), 0,
2974 (destructor)scanner_dealloc, /*tp_dealloc*/
2975 0, /*tp_print*/
2976 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002977};
2978
Guido van Rossumb700df92000-03-31 14:59:30 +00002979static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002980 {"compile", _compile, METH_VARARGS},
2981 {"getcodesize", sre_codesize, METH_VARARGS},
2982 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002983 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002984};
2985
Mark Hammond8235ea12002-07-19 06:55:41 +00002986PyMODINIT_FUNC init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002987{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002988 PyObject* m;
2989 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002990 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002991
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002992 /* Patch object types */
2993 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002994 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002995
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002996 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002997 d = PyModule_GetDict(m);
2998
Fredrik Lundh21009b92001-09-18 18:47:09 +00002999 x = PyInt_FromLong(SRE_MAGIC);
3000 if (x) {
3001 PyDict_SetItemString(d, "MAGIC", x);
3002 Py_DECREF(x);
3003 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003004
Fredrik Lundh21009b92001-09-18 18:47:09 +00003005 x = PyString_FromString(copyright);
3006 if (x) {
3007 PyDict_SetItemString(d, "copyright", x);
3008 Py_DECREF(x);
3009 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003010}
3011
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003012#endif /* !defined(SRE_RECURSIVE) */