blob: 4440a6e187f5dec131fb4edd5ce271a0db2be4be [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000027 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000029 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000030 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000031 * 2001-04-28 fl added __copy__ methods (work in progress)
32 * 2001-05-14 fl fixes for 1.5.2
Fredrik Lundhf71ae462001-07-02 17:04:48 +000033 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000034 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000035 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000036 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh6de22ef2001-10-22 21:18:08 +000037 * 2001-10-22 fl check for literal sub/subn templates
Fredrik Lundh703ce812001-10-24 22:16:30 +000038 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000039 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Guido van Rossumb700df92000-03-31 14:59:30 +000040 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000041 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000042 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000043 * This version of the SRE library can be redistributed under CNRI's
44 * Python 1.6 license. For any other use, please contact Secret Labs
45 * AB (info@pythonware.com).
46 *
Guido van Rossumb700df92000-03-31 14:59:30 +000047 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000048 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000049 * other compatibility work.
50 */
51
52#ifndef SRE_RECURSIVE
53
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000054static char copyright[] =
Fredrik Lundhbec95b92001-10-21 16:47:57 +000055 " SRE 2.2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000056
57#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000058#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000059
60#include "sre.h"
61
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000062#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000063
Fredrik Lundh436c3d582000-06-29 08:58:44 +000064/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000065#if !defined(SRE_MODULE)
66#define SRE_MODULE "sre"
67#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000068
Guido van Rossumb700df92000-03-31 14:59:30 +000069/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000071
Fredrik Lundh971e78b2001-10-20 17:48:46 +000072#if PY_VERSION_HEX >= 0x01060000
73#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000074/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000075#define HAVE_UNICODE
76#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000077#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000078
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000080/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081
Fredrik Lundh33accc12000-08-27 20:59:47 +000082/* prevent run-away recursion (bad patterns on long strings) */
83
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000084#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000085#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
86/* require smaller recursion limit for a number of 64-bit platforms:
87 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
88/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
89#define USE_RECURSION_LIMIT 7500
90#else
91#define USE_RECURSION_LIMIT 10000
92#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000093#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000095/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000096#define USE_FAST_SEARCH
97
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000098/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000099#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000101/* enables copy/deepcopy handling (work in progress) */
102#undef USE_BUILTIN_COPY
103
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000104#if PY_VERSION_HEX < 0x01060000
105#define PyObject_DEL(op) PyMem_DEL((op))
106#endif
107
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000108/* -------------------------------------------------------------------- */
109
Fredrik Lundh80946112000-06-29 18:03:25 +0000110#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +0000111#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +0000112#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +0000113/* fastest possible local call under MSVC */
114#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000115#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000116#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000117#else
118#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000119#endif
120
121/* error codes */
122#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000123#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000124#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000125#define SRE_ERROR_MEMORY -9 /* out of memory */
126
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000127#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000128#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000129#else
130#define TRACE(v)
131#endif
132
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000133/* -------------------------------------------------------------------- */
134/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000135
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000136/* default character predicates (run sre_chars.py to regenerate tables) */
137
138#define SRE_DIGIT_MASK 1
139#define SRE_SPACE_MASK 2
140#define SRE_LINEBREAK_MASK 4
141#define SRE_ALNUM_MASK 8
142#define SRE_WORD_MASK 16
143
Fredrik Lundh21009b92001-09-18 18:47:09 +0000144/* FIXME: this assumes ASCII. create tables in init_sre() instead */
145
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000146static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1472, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1480, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
14925, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
15024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1510, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
15224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
153
Fredrik Lundhb389df32000-06-29 12:48:37 +0000154static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000015510, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
15627, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
15744, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
15861, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
159108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
160122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
161106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
162120, 121, 122, 123, 124, 125, 126, 127 };
163
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000164#define SRE_IS_DIGIT(ch)\
165 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
166#define SRE_IS_SPACE(ch)\
167 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
168#define SRE_IS_LINEBREAK(ch)\
169 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
170#define SRE_IS_ALNUM(ch)\
171 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
172#define SRE_IS_WORD(ch)\
173 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000174
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000175static unsigned int sre_lower(unsigned int ch)
176{
177 return ((ch) < 128 ? sre_char_lower[ch] : ch);
178}
179
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000180/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000182#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
183#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
184#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
185#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
186#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
187
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000188static unsigned int sre_lower_locale(unsigned int ch)
189{
190 return ((ch) < 256 ? tolower((ch)) : ch);
191}
192
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000193/* unicode-specific character predicates */
194
195#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000196
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000197#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
198#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
199#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000200#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000201#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000202
203static unsigned int sre_lower_unicode(unsigned int ch)
204{
205 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
206}
207
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000208#endif
209
Guido van Rossumb700df92000-03-31 14:59:30 +0000210LOCAL(int)
211sre_category(SRE_CODE category, unsigned int ch)
212{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000213 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000214
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000215 case SRE_CATEGORY_DIGIT:
216 return SRE_IS_DIGIT(ch);
217 case SRE_CATEGORY_NOT_DIGIT:
218 return !SRE_IS_DIGIT(ch);
219 case SRE_CATEGORY_SPACE:
220 return SRE_IS_SPACE(ch);
221 case SRE_CATEGORY_NOT_SPACE:
222 return !SRE_IS_SPACE(ch);
223 case SRE_CATEGORY_WORD:
224 return SRE_IS_WORD(ch);
225 case SRE_CATEGORY_NOT_WORD:
226 return !SRE_IS_WORD(ch);
227 case SRE_CATEGORY_LINEBREAK:
228 return SRE_IS_LINEBREAK(ch);
229 case SRE_CATEGORY_NOT_LINEBREAK:
230 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 case SRE_CATEGORY_LOC_WORD:
233 return SRE_LOC_IS_WORD(ch);
234 case SRE_CATEGORY_LOC_NOT_WORD:
235 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000236
237#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000238 case SRE_CATEGORY_UNI_DIGIT:
239 return SRE_UNI_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_NOT_DIGIT:
241 return !SRE_UNI_IS_DIGIT(ch);
242 case SRE_CATEGORY_UNI_SPACE:
243 return SRE_UNI_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_NOT_SPACE:
245 return !SRE_UNI_IS_SPACE(ch);
246 case SRE_CATEGORY_UNI_WORD:
247 return SRE_UNI_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_NOT_WORD:
249 return !SRE_UNI_IS_WORD(ch);
250 case SRE_CATEGORY_UNI_LINEBREAK:
251 return SRE_UNI_IS_LINEBREAK(ch);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
253 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000254#else
255 case SRE_CATEGORY_UNI_DIGIT:
256 return SRE_IS_DIGIT(ch);
257 case SRE_CATEGORY_UNI_NOT_DIGIT:
258 return !SRE_IS_DIGIT(ch);
259 case SRE_CATEGORY_UNI_SPACE:
260 return SRE_IS_SPACE(ch);
261 case SRE_CATEGORY_UNI_NOT_SPACE:
262 return !SRE_IS_SPACE(ch);
263 case SRE_CATEGORY_UNI_WORD:
264 return SRE_LOC_IS_WORD(ch);
265 case SRE_CATEGORY_UNI_NOT_WORD:
266 return !SRE_LOC_IS_WORD(ch);
267 case SRE_CATEGORY_UNI_LINEBREAK:
268 return SRE_IS_LINEBREAK(ch);
269 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
270 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000271#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000272 }
273 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000274}
275
276/* helpers */
277
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000279mark_fini(SRE_STATE* state)
280{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000281 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000282 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000283 state->mark_stack = NULL;
284 }
285 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000286}
287
288static int
289mark_save(SRE_STATE* state, int lo, int hi)
290{
291 void* stack;
292 int size;
293 int minsize, newsize;
294
295 if (hi <= lo)
296 return 0;
297
298 size = (hi - lo) + 1;
299
300 newsize = state->mark_stack_size;
301 minsize = state->mark_stack_base + size;
302
303 if (newsize < minsize) {
304 /* create new stack */
305 if (!newsize) {
306 newsize = 512;
307 if (newsize < minsize)
308 newsize = minsize;
309 TRACE(("allocate stack %d\n", newsize));
310 stack = malloc(sizeof(void*) * newsize);
311 } else {
312 /* grow the stack */
313 while (newsize < minsize)
314 newsize += newsize;
315 TRACE(("grow stack to %d\n", newsize));
316 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
317 }
318 if (!stack) {
319 mark_fini(state);
320 return SRE_ERROR_MEMORY;
321 }
322 state->mark_stack = stack;
323 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000324 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000325
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000326 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000327
328 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
329 size * sizeof(void*));
330
331 state->mark_stack_base += size;
332
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000333 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000334}
335
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000336static int
337mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000338{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000339 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000341 if (hi <= lo)
342 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000344 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000345
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000346 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000347
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000348 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000349
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000350 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
351 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000354}
355
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000356void lastmark_restore(SRE_STATE *state, int lastmark)
357{
358 if (state->lastmark > lastmark) {
359 memset(
360 state->mark + lastmark + 1, 0,
361 (state->lastmark - lastmark) * sizeof(void*)
362 );
363 state->lastmark = lastmark;
364 state->lastindex = (lastmark == 0) ? -1 : (lastmark-1)/2+1;
365 }
366}
367
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000368/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000369
370#define SRE_CHAR unsigned char
371#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000372#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000373#define SRE_CHARSET sre_charset
374#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000375#define SRE_MATCH sre_match
376#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000377#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000378
379#if defined(HAVE_UNICODE)
380
Guido van Rossumb700df92000-03-31 14:59:30 +0000381#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000382#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000383#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000384
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000385#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000386#undef SRE_SEARCH
387#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000388#undef SRE_INFO
389#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000390#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000391#undef SRE_AT
392#undef SRE_CHAR
393
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000394/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000395
396#define SRE_CHAR Py_UNICODE
397#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000398#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000399#define SRE_CHARSET sre_ucharset
400#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000401#define SRE_MATCH sre_umatch
402#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000403#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000404#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000405
406#endif /* SRE_RECURSIVE */
407
408/* -------------------------------------------------------------------- */
409/* String matching engine */
410
411/* the following section is compiled twice, with different character
412 settings */
413
414LOCAL(int)
415SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
416{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000417 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000418
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000419 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000420
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000421 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000423 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000424 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000425 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000427 case SRE_AT_BEGINNING_LINE:
428 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000429 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000430
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000431 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000432 return (((void*) (ptr+1) == state->end &&
433 SRE_IS_LINEBREAK((int) ptr[0])) ||
434 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000435
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000436 case SRE_AT_END_LINE:
437 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000438 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000439
Fredrik Lundh770617b2001-01-14 15:06:11 +0000440 case SRE_AT_END_STRING:
441 return ((void*) ptr == state->end);
442
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000443 case SRE_AT_BOUNDARY:
444 if (state->beginning == state->end)
445 return 0;
446 that = ((void*) ptr > state->beginning) ?
447 SRE_IS_WORD((int) ptr[-1]) : 0;
448 this = ((void*) ptr < state->end) ?
449 SRE_IS_WORD((int) ptr[0]) : 0;
450 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000451
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000452 case SRE_AT_NON_BOUNDARY:
453 if (state->beginning == state->end)
454 return 0;
455 that = ((void*) ptr > state->beginning) ?
456 SRE_IS_WORD((int) ptr[-1]) : 0;
457 this = ((void*) ptr < state->end) ?
458 SRE_IS_WORD((int) ptr[0]) : 0;
459 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000460
461 case SRE_AT_LOC_BOUNDARY:
462 if (state->beginning == state->end)
463 return 0;
464 that = ((void*) ptr > state->beginning) ?
465 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
466 this = ((void*) ptr < state->end) ?
467 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
468 return this != that;
469
470 case SRE_AT_LOC_NON_BOUNDARY:
471 if (state->beginning == state->end)
472 return 0;
473 that = ((void*) ptr > state->beginning) ?
474 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
475 this = ((void*) ptr < state->end) ?
476 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
477 return this == that;
478
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000479#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000480 case SRE_AT_UNI_BOUNDARY:
481 if (state->beginning == state->end)
482 return 0;
483 that = ((void*) ptr > state->beginning) ?
484 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
485 this = ((void*) ptr < state->end) ?
486 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
487 return this != that;
488
489 case SRE_AT_UNI_NON_BOUNDARY:
490 if (state->beginning == state->end)
491 return 0;
492 that = ((void*) ptr > state->beginning) ?
493 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
494 this = ((void*) ptr < state->end) ?
495 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
496 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000497#endif
498
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000499 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000500
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000501 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000502}
503
504LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000505SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000506{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000507 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000508
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000509 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000510
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000511 for (;;) {
512 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000513
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000514 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000515 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000516 if (ch == set[0])
517 return ok;
518 set++;
519 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000520
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000521 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000522 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000523 if (set[0] <= ch && ch <= set[1])
524 return ok;
525 set += 2;
526 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000527
Fredrik Lundh3562f112000-07-02 12:00:07 +0000528 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000529 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000530 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
531 return ok;
532 set += 16;
533 break;
534
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000535 case SRE_OP_BIGCHARSET:
536 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
537 {
538 int count, block;
539 count = *(set++);
540 block = ((unsigned char*)set)[ch >> 8];
541 set += 128;
542 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
543 return ok;
544 set += count*16;
545 break;
546 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000547
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000548 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000549 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000550 if (sre_category(set[0], (int) ch))
551 return ok;
552 set += 1;
553 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000554
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000555 case SRE_OP_NEGATE:
556 ok = !ok;
557 break;
558
559 case SRE_OP_FAILURE:
560 return !ok;
561
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000562 default:
563 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000564 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000565 return 0;
566 }
567 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000568}
569
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000570LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
571
572LOCAL(int)
573SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
574{
575 SRE_CODE chr;
576 SRE_CHAR* ptr = state->ptr;
577 SRE_CHAR* end = state->end;
578 int i;
579
580 /* adjust end */
581 if (maxcount < end - ptr && maxcount != 65535)
582 end = ptr + maxcount;
583
584 switch (pattern[0]) {
585
586 case SRE_OP_ANY:
587 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000588 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000589 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
590 ptr++;
591 break;
592
593 case SRE_OP_ANY_ALL:
594 /* repeated dot wildcare. skip to the end of the target
595 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000596 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000597 ptr = end;
598 break;
599
600 case SRE_OP_LITERAL:
601 /* repeated literal */
602 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000603 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000604 while (ptr < end && (SRE_CODE) *ptr == chr)
605 ptr++;
606 break;
607
608 case SRE_OP_LITERAL_IGNORE:
609 /* repeated literal */
610 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000612 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
613 ptr++;
614 break;
615
616 case SRE_OP_NOT_LITERAL:
617 /* repeated non-literal */
618 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000619 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000620 while (ptr < end && (SRE_CODE) *ptr != chr)
621 ptr++;
622 break;
623
624 case SRE_OP_NOT_LITERAL_IGNORE:
625 /* repeated non-literal */
626 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000627 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000628 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
629 ptr++;
630 break;
631
632 case SRE_OP_IN:
633 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000634 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
635 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000636 ptr++;
637 break;
638
639 default:
640 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000641 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000642 while ((SRE_CHAR*) state->ptr < end) {
643 i = SRE_MATCH(state, pattern, level);
644 if (i < 0)
645 return i;
646 if (!i)
647 break;
648 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000649 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
650 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000651 return (SRE_CHAR*) state->ptr - ptr;
652 }
653
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000654 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000655 return ptr - (SRE_CHAR*) state->ptr;
656}
657
Fredrik Lundh33accc12000-08-27 20:59:47 +0000658#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000659LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000660SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
661{
662 /* check if an SRE_OP_INFO block matches at the current position.
663 returns the number of SRE_CODE objects to skip if successful, 0
664 if no match */
665
666 SRE_CHAR* end = state->end;
667 SRE_CHAR* ptr = state->ptr;
668 int i;
669
670 /* check minimal length */
671 if (pattern[3] && (end - ptr) < pattern[3])
672 return 0;
673
674 /* check known prefix */
675 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
676 /* <length> <skip> <prefix data> <overlap data> */
677 for (i = 0; i < pattern[5]; i++)
678 if ((SRE_CODE) ptr[i] != pattern[7 + i])
679 return 0;
680 return pattern[0] + 2 * pattern[6];
681 }
682 return pattern[0];
683}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000684#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685
686LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000687SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000688{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000689 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000690 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000691
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000692 SRE_CHAR* end = state->end;
693 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000694 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000695 SRE_REPEAT* rp;
696 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000697 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000698
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000699 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000700
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000701 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000702
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000703#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000704 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000705 return SRE_ERROR_RECURSION_LIMIT;
706#endif
707
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000708#if defined(USE_RECURSION_LIMIT)
709 if (level > USE_RECURSION_LIMIT)
710 return SRE_ERROR_RECURSION_LIMIT;
711#endif
712
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000713 if (pattern[0] == SRE_OP_INFO) {
714 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000715 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000716 if (pattern[3] && (end - ptr) < pattern[3]) {
717 TRACE(("reject (got %d chars, need %d)\n",
718 (end - ptr), pattern[3]));
719 return 0;
720 }
721 pattern += pattern[1] + 1;
722 }
723
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000725
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000726 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000727
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000728 case SRE_OP_FAILURE:
729 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000730 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000731 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000732
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000733 case SRE_OP_SUCCESS:
734 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000735 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000737 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000738
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 case SRE_OP_AT:
740 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000741 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000742 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000744 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 pattern++;
746 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000747
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 case SRE_OP_CATEGORY:
749 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000750 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000751 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000753 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000754 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000755 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000756 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000757
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000758 case SRE_OP_LITERAL:
759 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000760 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000761 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000763 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000764 pattern++;
765 ptr++;
766 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000767
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 case SRE_OP_NOT_LITERAL:
769 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000770 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000771 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000772 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000773 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000774 pattern++;
775 ptr++;
776 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000777
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000778 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000779 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000780 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000781 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000782 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
783 return 0;
784 ptr++;
785 break;
786
787 case SRE_OP_ANY_ALL:
788 /* match anything */
789 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000790 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000791 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000792 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 ptr++;
794 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000795
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000796 case SRE_OP_IN:
797 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000798 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000799 TRACE(("|%p|%p|IN\n", pattern, ptr));
800 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000801 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000802 pattern += pattern[0];
803 ptr++;
804 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000805
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000806 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000807 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000808 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000809 i = pattern[0];
810 {
811 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
812 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
813 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000814 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815 while (p < e) {
816 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000817 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000818 p++; ptr++;
819 }
820 }
821 pattern++;
822 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000823
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000824 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000825 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000826 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000827 i = pattern[0];
828 {
829 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
830 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
831 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000832 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000833 while (p < e) {
834 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000835 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000836 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000837 p++; ptr++;
838 }
839 }
840 pattern++;
841 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000842
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000843 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000844 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000845 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000846 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000847 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000848 pattern++;
849 ptr++;
850 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000851
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000852 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000853 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000854 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000855 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000856 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000857 pattern++;
858 ptr++;
859 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000860
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000861 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000862 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000863 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000864 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000865 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000866 pattern += pattern[0];
867 ptr++;
868 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000869
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000870 case SRE_OP_MARK:
871 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000872 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000873 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000874 i = pattern[0];
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000875 if (i > state->lastmark) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000876 state->lastmark = i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000877 if (i & 1)
878 state->lastindex = i/2 + 1;
879 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000880 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000881 pattern++;
882 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000883
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000884 case SRE_OP_JUMP:
885 case SRE_OP_INFO:
886 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000887 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000888 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000889 pattern += pattern[0];
890 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000891
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000892 case SRE_OP_ASSERT:
893 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000894 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000895 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000896 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000897 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000898 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000899 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000900 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000901 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000902 pattern += pattern[0];
903 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000904
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000905 case SRE_OP_ASSERT_NOT:
906 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000907 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000908 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000909 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000910 if (state->ptr >= state->beginning) {
911 i = SRE_MATCH(state, pattern + 2, level + 1);
912 if (i < 0)
913 return i;
914 if (i)
915 return 0;
916 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000917 pattern += pattern[0];
918 break;
919
920 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000921 /* alternation */
922 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000923 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000924 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000925 for (; pattern[0]; pattern += pattern[0]) {
926 if (pattern[1] == SRE_OP_LITERAL &&
927 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
928 continue;
929 if (pattern[1] == SRE_OP_IN &&
930 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
931 continue;
932 state->ptr = ptr;
933 i = SRE_MATCH(state, pattern + 1, level + 1);
934 if (i)
935 return i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000936 lastmark_restore(state, lastmark);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000937 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000938 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000939
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000940 case SRE_OP_REPEAT_ONE:
941 /* match repeated sequence (maximizing regexp) */
942
943 /* this operator only works if the repeated item is
944 exactly one character wide, and we're not already
945 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000946 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000947
948 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
949
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000950 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000951 pattern[1], pattern[2]));
952
Fredrik Lundhe1869832000-08-01 22:47:49 +0000953 if (ptr + pattern[1] > end)
954 return 0; /* cannot match */
955
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000956 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000957
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000958 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
959 if (count < 0)
960 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000961
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000962 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000963
964 /* when we arrive here, count contains the number of
965 matches, and ptr points to the tail of the target
966 string. check if the rest of the pattern matches,
967 and backtrack if not. */
968
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000969 if (count < (int) pattern[1])
970 return 0;
971
972 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
973 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000974 state->ptr = ptr;
975 return 1;
976
977 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
978 /* tail starts with a literal. skip positions where
979 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000980 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000981 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000982 while (count >= (int) pattern[1] &&
983 (ptr >= end || *ptr != chr)) {
984 ptr--;
985 count--;
986 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000987 if (count < (int) pattern[1])
988 break;
989 state->ptr = ptr;
990 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000991 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000992 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000993 ptr--;
994 count--;
995 }
996
997 } else {
998 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000999 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001000 while (count >= (int) pattern[1]) {
1001 state->ptr = ptr;
1002 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001003 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +00001004 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001005 ptr--;
1006 count--;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +00001007 lastmark_restore(state, lastmark);
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001008 }
1009 }
1010 return 0;
1011
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001012 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001013 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001014 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001015 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001016 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001017 pattern[1], pattern[2]));
1018
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001019 rep.count = -1;
1020 rep.pattern = pattern;
1021
1022 /* install new repeat context */
1023 rep.prev = state->repeat;
1024 state->repeat = &rep;
1025
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001026 state->ptr = ptr;
1027 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001028
1029 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001030
1031 return i;
1032
1033 case SRE_OP_MAX_UNTIL:
1034 /* maximizing repeat */
1035 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1036
1037 /* FIXME: we probably need to deal with zero-width
1038 matches in here... */
1039
1040 rp = state->repeat;
1041 if (!rp)
1042 return SRE_ERROR_STATE;
1043
1044 state->ptr = ptr;
1045
1046 count = rp->count + 1;
1047
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001048 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001049
1050 if (count < rp->pattern[1]) {
1051 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001052 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001053 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001054 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001055 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001056 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001057 rp->count = count - 1;
1058 state->ptr = ptr;
1059 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001060 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001061
1062 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001063 /* we may have enough matches, but if we can
1064 match another item, do so */
1065 rp->count = count;
1066 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001067 i = mark_save(state, 0, lastmark);
1068 if (i < 0)
1069 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001070 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001071 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001072 if (i)
1073 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001074 i = mark_restore(state, 0, lastmark);
1075 if (i < 0)
1076 return i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +00001077 lastmark_restore(state, lastmark);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001078 rp->count = count - 1;
1079 state->ptr = ptr;
1080 }
1081
1082 /* cannot match more repeated items here. make sure the
1083 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001084 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001085 i = SRE_MATCH(state, pattern, level + 1);
1086 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001087 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001088 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001089 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001090 return 0;
1091
1092 case SRE_OP_MIN_UNTIL:
1093 /* minimizing repeat */
1094 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1095
1096 rp = state->repeat;
1097 if (!rp)
1098 return SRE_ERROR_STATE;
1099
1100 count = rp->count + 1;
1101
Fredrik Lundh770617b2001-01-14 15:06:11 +00001102 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1103 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001104
1105 state->ptr = ptr;
1106
1107 if (count < rp->pattern[1]) {
1108 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001109 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001110 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001111 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001112 if (i)
1113 return i;
1114 rp->count = count-1;
1115 state->ptr = ptr;
1116 return 0;
1117 }
1118
1119 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001120 state->repeat = rp->prev;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001121 i = SRE_MATCH(state, pattern, level + 1);
1122 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001123 return i;
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001124
Fredrik Lundh770617b2001-01-14 15:06:11 +00001125 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001126 state->repeat = rp;
1127
1128 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1129 return 0;
1130
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001131 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001132 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001133 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001134 if (i)
1135 return i;
1136 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001137 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001138 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001139
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001140 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001141 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001142 return SRE_ERROR_ILLEGAL;
1143 }
1144 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001145
Sjoerd Mullender89dfe9e2001-08-30 14:37:07 +00001146 /* can't end up here */
Fredrik Lundh21009b92001-09-18 18:47:09 +00001147 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
Guido van Rossumb700df92000-03-31 14:59:30 +00001148}
1149
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001150LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001151SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1152{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001153 SRE_CHAR* ptr = state->start;
1154 SRE_CHAR* end = state->end;
1155 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001156 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001157 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001158 SRE_CODE* prefix = NULL;
1159 SRE_CODE* charset = NULL;
1160 SRE_CODE* overlap = NULL;
1161 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001162
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001163 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001164 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001165 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001166
1167 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001168
1169 if (pattern[3] > 0) {
1170 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001171 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001172 end -= pattern[3]-1;
1173 if (end <= ptr)
1174 end = ptr+1;
1175 }
1176
Fredrik Lundh3562f112000-07-02 12:00:07 +00001177 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001178 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001179 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001180 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001181 prefix_skip = pattern[6];
1182 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001183 overlap = prefix + prefix_len - 1;
1184 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001185 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001186 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001187 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001188
1189 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001190 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001191
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001192 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1193 TRACE(("charset = %p\n", charset));
1194
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001195#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001196 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001197 /* pattern starts with a known prefix. use the overlap
1198 table to skip forward as fast as we possibly can */
1199 int i = 0;
1200 end = state->end;
1201 while (ptr < end) {
1202 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001203 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001204 if (!i)
1205 break;
1206 else
1207 i = overlap[i];
1208 } else {
1209 if (++i == prefix_len) {
1210 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001211 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1212 state->start = ptr + 1 - prefix_len;
1213 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001214 if (flags & SRE_INFO_LITERAL)
1215 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001216 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001217 if (status != 0)
1218 return status;
1219 /* close but no cigar -- try again */
1220 i = overlap[i];
1221 }
1222 break;
1223 }
1224
1225 }
1226 ptr++;
1227 }
1228 return 0;
1229 }
1230#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001231
Fredrik Lundh3562f112000-07-02 12:00:07 +00001232 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001233 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001234 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001235 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001236 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001237 for (;;) {
1238 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1239 ptr++;
1240 if (ptr == end)
1241 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001242 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001243 state->start = ptr;
1244 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001245 if (flags & SRE_INFO_LITERAL)
1246 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001247 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001248 if (status != 0)
1249 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001250 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001251 } else if (charset) {
1252 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001253 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001254 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001255 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001256 ptr++;
1257 if (ptr == end)
1258 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001259 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001260 state->start = ptr;
1261 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001262 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001263 if (status != 0)
1264 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001265 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001266 }
1267 } else
1268 /* general case */
1269 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001270 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001271 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001272 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001273 if (status != 0)
1274 break;
1275 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001276
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001277 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001278}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001279
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001280LOCAL(int)
1281SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1282{
1283 /* check if given string is a literal template (i.e. no escapes) */
1284 while (len-- > 0)
1285 if (*ptr++ == '\\')
1286 return 0;
1287 return 1;
1288}
Guido van Rossumb700df92000-03-31 14:59:30 +00001289
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001290#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001291
1292/* -------------------------------------------------------------------- */
1293/* factories and destructors */
1294
1295/* see sre.h for object declarations */
1296
Jeremy Hylton938ace62002-07-17 16:30:39 +00001297static PyTypeObject Pattern_Type;
1298static PyTypeObject Match_Type;
1299static PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001300
1301static PyObject *
1302_compile(PyObject* self_, PyObject* args)
1303{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001304 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001305
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001306 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001307 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001308
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001309 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001310 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001311 PyObject* code;
1312 int groups = 0;
1313 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001314 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001315 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1316 &PyList_Type, &code, &groups,
1317 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001318 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001319
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001320 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001321
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001322 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001323 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001324 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001325
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001326 self->codesize = n;
1327
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001328 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001329 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001330 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001331 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001332
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001333 if (PyErr_Occurred()) {
1334 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001335 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001336 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001337
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001338 Py_INCREF(pattern);
1339 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001340
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001341 self->flags = flags;
1342
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001343 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001344
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001345 Py_XINCREF(groupindex);
1346 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001347
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001348 Py_XINCREF(indexgroup);
1349 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001350
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001351 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001352}
1353
1354static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001355sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001356{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001358}
1359
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001360static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001361sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001362{
1363 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001364 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001365 return NULL;
1366 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001367 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001368 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001369#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001370 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001371#else
1372 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001373#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001374 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001375}
1376
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001377LOCAL(void)
1378state_reset(SRE_STATE* state)
1379{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001380 state->lastmark = 0;
1381
1382 /* FIXME: dynamic! */
Neal Norwitz35fc7602002-06-13 21:11:11 +00001383 memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001384
1385 state->lastindex = -1;
1386
1387 state->repeat = NULL;
1388
1389 mark_fini(state);
1390}
1391
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001392static void*
1393getstring(PyObject* string, int* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001394{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001395 /* given a python object, return a data pointer, a length (in
1396 characters), and a character size. return NULL if the object
1397 is not a string (or not compatible) */
1398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001399 PyBufferProcs *buffer;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001400 int size, bytes, charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001401 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001402
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001403#if defined(HAVE_UNICODE)
1404 if (PyUnicode_Check(string)) {
1405 /* unicode strings doesn't always support the buffer interface */
1406 ptr = (void*) PyUnicode_AS_DATA(string);
1407 bytes = PyUnicode_GET_DATA_SIZE(string);
1408 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001409 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001410
1411 } else {
1412#endif
1413
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001414 /* get pointer to string buffer */
1415 buffer = string->ob_type->tp_as_buffer;
1416 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1417 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001418 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001419 return NULL;
1420 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001421
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001422 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001423 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1424 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001425 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1426 return NULL;
1427 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001429 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001430#if PY_VERSION_HEX >= 0x01060000
1431 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001432#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001433 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001434#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001435
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001436 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001437 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001438#if defined(HAVE_UNICODE)
1439 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001440 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001441#endif
1442 else {
1443 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1444 return NULL;
1445 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001446
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001447#if defined(HAVE_UNICODE)
1448 }
1449#endif
1450
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001451 *p_length = size;
1452 *p_charsize = charsize;
1453
1454 return ptr;
1455}
1456
1457LOCAL(PyObject*)
1458state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1459 int start, int end)
1460{
1461 /* prepare state object */
1462
1463 int length;
1464 int charsize;
1465 void* ptr;
1466
1467 memset(state, 0, sizeof(SRE_STATE));
1468
1469 state->lastindex = -1;
1470
1471 ptr = getstring(string, &length, &charsize);
1472 if (!ptr)
1473 return NULL;
1474
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001475 /* adjust boundaries */
1476 if (start < 0)
1477 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001478 else if (start > length)
1479 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001480
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001481 if (end < 0)
1482 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001483 else if (end > length)
1484 end = length;
1485
1486 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001487
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001488 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001489
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001490 state->start = (void*) ((char*) ptr + start * state->charsize);
1491 state->end = (void*) ((char*) ptr + end * state->charsize);
1492
1493 Py_INCREF(string);
1494 state->string = string;
1495 state->pos = start;
1496 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001497
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001498 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001499 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001500 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001501#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001502 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001503#else
1504 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001505#endif
1506 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001507 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001508
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001509 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001510}
1511
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001512LOCAL(void)
1513state_fini(SRE_STATE* state)
1514{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001515 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001516 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001517}
1518
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001519/* calculate offset from start of string */
1520#define STATE_OFFSET(state, member)\
1521 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1522
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001523LOCAL(PyObject*)
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001524state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001525{
Fredrik Lundh58100642000-08-09 09:14:35 +00001526 int i, j;
1527
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001528 index = (index - 1) * 2;
1529
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001530 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001531 if (empty)
1532 /* want empty string */
1533 i = j = 0;
1534 else {
1535 Py_INCREF(Py_None);
1536 return Py_None;
1537 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001538 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001539 i = STATE_OFFSET(state, state->mark[index]);
1540 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001541 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001542
Fredrik Lundh58100642000-08-09 09:14:35 +00001543 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001544}
1545
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001546static void
1547pattern_error(int status)
1548{
1549 switch (status) {
1550 case SRE_ERROR_RECURSION_LIMIT:
1551 PyErr_SetString(
1552 PyExc_RuntimeError,
1553 "maximum recursion limit exceeded"
1554 );
1555 break;
1556 case SRE_ERROR_MEMORY:
1557 PyErr_NoMemory();
1558 break;
1559 default:
1560 /* other error codes indicate compiler/engine bugs */
1561 PyErr_SetString(
1562 PyExc_RuntimeError,
1563 "internal error in regular expression engine"
1564 );
1565 }
1566}
1567
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001568static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001569pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001570{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001571 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001572
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001573 MatchObject* match;
1574 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001575 char* base;
1576 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001577
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001578 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001579
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001580 /* create match object (with room for extra group marks) */
1581 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001582 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001583 if (!match)
1584 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001585
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001586 Py_INCREF(pattern);
1587 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001588
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001589 Py_INCREF(state->string);
1590 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001591
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001592 match->regs = NULL;
1593 match->groups = pattern->groups+1;
1594
1595 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001596
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001597 base = (char*) state->beginning;
1598 n = state->charsize;
1599
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001600 match->mark[0] = ((char*) state->start - base) / n;
1601 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001602
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001603 for (i = j = 0; i < pattern->groups; i++, j+=2)
1604 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1605 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1606 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1607 } else
1608 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1609
1610 match->pos = state->pos;
1611 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001612
Fredrik Lundh6f013982000-07-03 18:44:21 +00001613 match->lastindex = state->lastindex;
1614
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001615 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001616
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001617 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001618
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001619 /* no match */
1620 Py_INCREF(Py_None);
1621 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001622
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001623 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001624
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001625 /* internal error */
1626 pattern_error(status);
1627 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001628}
1629
1630static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001631pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001632{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001633 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001634
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001635 ScannerObject* self;
1636
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001637 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001638 int start = 0;
1639 int end = INT_MAX;
1640 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1641 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001642
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001643 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001644 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001645 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001646 return NULL;
1647
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001648 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001649 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001650 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001651 return NULL;
1652 }
1653
1654 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001655 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001656
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001657 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001658}
1659
Guido van Rossumb700df92000-03-31 14:59:30 +00001660static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001661pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001662{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001663 Py_XDECREF(self->pattern);
1664 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001665 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001666 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001667}
1668
1669static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001670pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001671{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001672 SRE_STATE state;
1673 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001674
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001675 PyObject* string;
1676 int start = 0;
1677 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001678 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1679 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1680 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001681 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001682
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001683 string = state_init(&state, self, string, start, end);
1684 if (!string)
1685 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001686
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001687 state.ptr = state.start;
1688
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001689 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1690
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001691 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001692 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001694#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001695 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001696#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001697 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001698
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001699 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1700
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001701 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001702
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001703 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001704}
1705
1706static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001707pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001708{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001709 SRE_STATE state;
1710 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001711
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001712 PyObject* string;
1713 int start = 0;
1714 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001715 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1716 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1717 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001718 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001719
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001720 string = state_init(&state, self, string, start, end);
1721 if (!string)
1722 return NULL;
1723
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001724 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1725
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001726 if (state.charsize == 1) {
1727 status = sre_search(&state, PatternObject_GetCode(self));
1728 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001729#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001730 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001731#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001732 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001733
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001734 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1735
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001736 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001737
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001738 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001739}
1740
1741static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001742call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001743{
1744 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001745 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001746 PyObject* func;
1747 PyObject* result;
1748
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001749 if (!args)
1750 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001751 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001752 if (!name)
1753 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001754 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001755 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001756 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001757 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001758 func = PyObject_GetAttrString(mod, function);
1759 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001760 if (!func)
1761 return NULL;
1762 result = PyObject_CallObject(func, args);
1763 Py_DECREF(func);
1764 Py_DECREF(args);
1765 return result;
1766}
1767
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001768#ifdef USE_BUILTIN_COPY
1769static int
1770deepcopy(PyObject** object, PyObject* memo)
1771{
1772 PyObject* copy;
1773
1774 copy = call(
1775 "copy", "deepcopy",
1776 Py_BuildValue("OO", *object, memo)
1777 );
1778 if (!copy)
1779 return 0;
1780
1781 Py_DECREF(*object);
1782 *object = copy;
1783
1784 return 1; /* success */
1785}
1786#endif
1787
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001788static PyObject*
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00001789join_list(PyObject* list, PyObject* pattern)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001790{
1791 /* join list elements */
1792
1793 PyObject* joiner;
1794#if PY_VERSION_HEX >= 0x01060000
1795 PyObject* function;
1796 PyObject* args;
1797#endif
1798 PyObject* result;
1799
1800 switch (PyList_GET_SIZE(list)) {
1801 case 0:
1802 Py_DECREF(list);
1803 return PyString_FromString("");
1804 case 1:
1805 result = PyList_GET_ITEM(list, 0);
1806 Py_INCREF(result);
1807 Py_DECREF(list);
1808 return result;
1809 }
1810
1811 /* two or more elements: slice out a suitable separator from the
1812 first member, and use that to join the entire list */
1813
1814 joiner = PySequence_GetSlice(pattern, 0, 0);
1815 if (!joiner)
1816 return NULL;
1817
1818#if PY_VERSION_HEX >= 0x01060000
1819 function = PyObject_GetAttrString(joiner, "join");
1820 if (!function) {
1821 Py_DECREF(joiner);
1822 return NULL;
1823 }
1824 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001825 if (!args) {
1826 Py_DECREF(function);
1827 Py_DECREF(joiner);
1828 return NULL;
1829 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001830 PyTuple_SET_ITEM(args, 0, list);
1831 result = PyObject_CallObject(function, args);
1832 Py_DECREF(args); /* also removes list */
1833 Py_DECREF(function);
1834#else
1835 result = call(
1836 "string", "join",
1837 Py_BuildValue("OO", list, joiner)
1838 );
1839#endif
1840 Py_DECREF(joiner);
1841
1842 return result;
1843}
1844
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001845static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001846pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001847{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001848 SRE_STATE state;
1849 PyObject* list;
1850 int status;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001851 int i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00001852
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001853 PyObject* string;
1854 int start = 0;
1855 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001856 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1857 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1858 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001859 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001860
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001861 string = state_init(&state, self, string, start, end);
1862 if (!string)
1863 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001864
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001865 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001866 if (!list) {
1867 state_fini(&state);
1868 return NULL;
1869 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001870
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001871 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001872
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001873 PyObject* item;
1874
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001875 state_reset(&state);
1876
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001877 state.ptr = state.start;
1878
1879 if (state.charsize == 1) {
1880 status = sre_search(&state, PatternObject_GetCode(self));
1881 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001882#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001883 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001884#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001885 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001886
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001887 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001888 if (status == 0)
1889 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001890 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001891 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001892 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001893
1894 /* don't bother to build a match object */
1895 switch (self->groups) {
1896 case 0:
1897 b = STATE_OFFSET(&state, state.start);
1898 e = STATE_OFFSET(&state, state.ptr);
1899 item = PySequence_GetSlice(string, b, e);
1900 if (!item)
1901 goto error;
1902 break;
1903 case 1:
1904 item = state_getslice(&state, 1, string, 1);
1905 if (!item)
1906 goto error;
1907 break;
1908 default:
1909 item = PyTuple_New(self->groups);
1910 if (!item)
1911 goto error;
1912 for (i = 0; i < self->groups; i++) {
1913 PyObject* o = state_getslice(&state, i+1, string, 1);
1914 if (!o) {
1915 Py_DECREF(item);
1916 goto error;
1917 }
1918 PyTuple_SET_ITEM(item, i, o);
1919 }
1920 break;
1921 }
1922
1923 status = PyList_Append(list, item);
1924 Py_DECREF(item);
1925 if (status < 0)
1926 goto error;
1927
1928 if (state.ptr == state.start)
1929 state.start = (void*) ((char*) state.ptr + state.charsize);
1930 else
1931 state.start = state.ptr;
1932
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001933 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001934
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001935 state_fini(&state);
1936 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001937
1938error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001939 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001940 state_fini(&state);
1941 return NULL;
1942
Guido van Rossumb700df92000-03-31 14:59:30 +00001943}
1944
Fredrik Lundh703ce812001-10-24 22:16:30 +00001945#if PY_VERSION_HEX >= 0x02020000
1946static PyObject*
1947pattern_finditer(PatternObject* pattern, PyObject* args)
1948{
1949 PyObject* scanner;
1950 PyObject* search;
1951 PyObject* iterator;
1952
1953 scanner = pattern_scanner(pattern, args);
1954 if (!scanner)
1955 return NULL;
1956
1957 search = PyObject_GetAttrString(scanner, "search");
1958 Py_DECREF(scanner);
1959 if (!search)
1960 return NULL;
1961
1962 iterator = PyCallIter_New(search, Py_None);
1963 Py_DECREF(search);
1964
1965 return iterator;
1966}
1967#endif
1968
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001969static PyObject*
1970pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
1971{
1972 SRE_STATE state;
1973 PyObject* list;
1974 PyObject* item;
1975 int status;
1976 int n;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001977 int i;
1978 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001979
1980 PyObject* string;
1981 int maxsplit = 0;
1982 static char* kwlist[] = { "source", "maxsplit", NULL };
1983 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
1984 &string, &maxsplit))
1985 return NULL;
1986
1987 string = state_init(&state, self, string, 0, INT_MAX);
1988 if (!string)
1989 return NULL;
1990
1991 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001992 if (!list) {
1993 state_fini(&state);
1994 return NULL;
1995 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001996
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001997 n = 0;
1998 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001999
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002000 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002001
2002 state_reset(&state);
2003
2004 state.ptr = state.start;
2005
2006 if (state.charsize == 1) {
2007 status = sre_search(&state, PatternObject_GetCode(self));
2008 } else {
2009#if defined(HAVE_UNICODE)
2010 status = sre_usearch(&state, PatternObject_GetCode(self));
2011#endif
2012 }
2013
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002014 if (status <= 0) {
2015 if (status == 0)
2016 break;
2017 pattern_error(status);
2018 goto error;
2019 }
2020
2021 if (state.start == state.ptr) {
2022 if (last == state.end)
2023 break;
2024 /* skip one character */
2025 state.start = (void*) ((char*) state.ptr + state.charsize);
2026 continue;
2027 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002028
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002029 /* get segment before this match */
2030 item = PySequence_GetSlice(
2031 string, STATE_OFFSET(&state, last),
2032 STATE_OFFSET(&state, state.start)
2033 );
2034 if (!item)
2035 goto error;
2036 status = PyList_Append(list, item);
2037 Py_DECREF(item);
2038 if (status < 0)
2039 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002040
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002041 /* add groups (if any) */
2042 for (i = 0; i < self->groups; i++) {
2043 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002044 if (!item)
2045 goto error;
2046 status = PyList_Append(list, item);
2047 Py_DECREF(item);
2048 if (status < 0)
2049 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002050 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002051
2052 n = n + 1;
2053
2054 last = state.start = state.ptr;
2055
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002056 }
2057
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002058 /* get segment following last match (even if empty) */
2059 item = PySequence_GetSlice(
2060 string, STATE_OFFSET(&state, last), state.endpos
2061 );
2062 if (!item)
2063 goto error;
2064 status = PyList_Append(list, item);
2065 Py_DECREF(item);
2066 if (status < 0)
2067 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002068
2069 state_fini(&state);
2070 return list;
2071
2072error:
2073 Py_DECREF(list);
2074 state_fini(&state);
2075 return NULL;
2076
2077}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002078
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002079static PyObject*
2080pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2081 int count, int subn)
2082{
2083 SRE_STATE state;
2084 PyObject* list;
2085 PyObject* item;
2086 PyObject* filter;
2087 PyObject* args;
2088 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002089 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002090 int status;
2091 int n;
2092 int i, b, e;
2093 int filter_is_callable;
2094
Fredrik Lundhdac58492001-10-21 21:48:30 +00002095 if (PyCallable_Check(template)) {
2096 /* sub/subn takes either a function or a template */
2097 filter = template;
2098 Py_INCREF(filter);
2099 filter_is_callable = 1;
2100 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002101 /* if not callable, check if it's a literal string */
2102 int literal;
2103 ptr = getstring(template, &n, &b);
2104 if (ptr) {
2105 if (b == 1) {
2106 literal = sre_literal_template(ptr, n);
2107 } else {
2108#if defined(HAVE_UNICODE)
2109 literal = sre_uliteral_template(ptr, n);
2110#endif
2111 }
2112 } else {
2113 PyErr_Clear();
2114 literal = 0;
2115 }
2116 if (literal) {
2117 filter = template;
2118 Py_INCREF(filter);
2119 filter_is_callable = 0;
2120 } else {
2121 /* not a literal; hand it over to the template compiler */
2122 filter = call(
2123 SRE_MODULE, "_subx",
2124 Py_BuildValue("OO", self, template)
2125 );
2126 if (!filter)
2127 return NULL;
2128 filter_is_callable = PyCallable_Check(filter);
2129 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002130 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002131
2132 string = state_init(&state, self, string, 0, INT_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002133 if (!string) {
2134 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002135 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002136 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002137
2138 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002139 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002140 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002141 state_fini(&state);
2142 return NULL;
2143 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002144
2145 n = i = 0;
2146
2147 while (!count || n < count) {
2148
2149 state_reset(&state);
2150
2151 state.ptr = state.start;
2152
2153 if (state.charsize == 1) {
2154 status = sre_search(&state, PatternObject_GetCode(self));
2155 } else {
2156#if defined(HAVE_UNICODE)
2157 status = sre_usearch(&state, PatternObject_GetCode(self));
2158#endif
2159 }
2160
2161 if (status <= 0) {
2162 if (status == 0)
2163 break;
2164 pattern_error(status);
2165 goto error;
2166 }
2167
2168 b = STATE_OFFSET(&state, state.start);
2169 e = STATE_OFFSET(&state, state.ptr);
2170
2171 if (i < b) {
2172 /* get segment before this match */
2173 item = PySequence_GetSlice(string, i, b);
2174 if (!item)
2175 goto error;
2176 status = PyList_Append(list, item);
2177 Py_DECREF(item);
2178 if (status < 0)
2179 goto error;
2180
2181 } else if (i == b && i == e && n > 0)
2182 /* ignore empty match on latest position */
2183 goto next;
2184
2185 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002186 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002187 match = pattern_new_match(self, &state, 1);
2188 if (!match)
2189 goto error;
2190 args = Py_BuildValue("(O)", match);
2191 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002192 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002193 goto error;
2194 }
2195 item = PyObject_CallObject(filter, args);
2196 Py_DECREF(args);
2197 Py_DECREF(match);
2198 if (!item)
2199 goto error;
2200 } else {
2201 /* filter is literal string */
2202 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002203 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002204 }
2205
2206 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002207 if (item != Py_None) {
2208 status = PyList_Append(list, item);
2209 Py_DECREF(item);
2210 if (status < 0)
2211 goto error;
2212 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002213
2214 i = e;
2215 n = n + 1;
2216
2217next:
2218 /* move on */
2219 if (state.ptr == state.start)
2220 state.start = (void*) ((char*) state.ptr + state.charsize);
2221 else
2222 state.start = state.ptr;
2223
2224 }
2225
2226 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002227 if (i < state.endpos) {
2228 item = PySequence_GetSlice(string, i, state.endpos);
2229 if (!item)
2230 goto error;
2231 status = PyList_Append(list, item);
2232 Py_DECREF(item);
2233 if (status < 0)
2234 goto error;
2235 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002236
2237 state_fini(&state);
2238
Guido van Rossum4e173842001-12-07 04:25:10 +00002239 Py_DECREF(filter);
2240
Fredrik Lundhdac58492001-10-21 21:48:30 +00002241 /* convert list to single string (also removes list) */
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00002242 item = join_list(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002243
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002244 if (!item)
2245 return NULL;
2246
2247 if (subn)
2248 return Py_BuildValue("Ni", item, n);
2249
2250 return item;
2251
2252error:
2253 Py_DECREF(list);
2254 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002255 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002256 return NULL;
2257
2258}
2259
2260static PyObject*
2261pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2262{
2263 PyObject* template;
2264 PyObject* string;
2265 int count = 0;
2266 static char* kwlist[] = { "repl", "string", "count", NULL };
2267 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2268 &template, &string, &count))
2269 return NULL;
2270
2271 return pattern_subx(self, template, string, count, 0);
2272}
2273
2274static PyObject*
2275pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2276{
2277 PyObject* template;
2278 PyObject* string;
2279 int count = 0;
2280 static char* kwlist[] = { "repl", "string", "count", NULL };
2281 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2282 &template, &string, &count))
2283 return NULL;
2284
2285 return pattern_subx(self, template, string, count, 1);
2286}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002287
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002288static PyObject*
2289pattern_copy(PatternObject* self, PyObject* args)
2290{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002291#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002292 PatternObject* copy;
2293 int offset;
2294
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002295 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2296 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002297
2298 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2299 if (!copy)
2300 return NULL;
2301
2302 offset = offsetof(PatternObject, groups);
2303
2304 Py_XINCREF(self->groupindex);
2305 Py_XINCREF(self->indexgroup);
2306 Py_XINCREF(self->pattern);
2307
2308 memcpy((char*) copy + offset, (char*) self + offset,
2309 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2310
2311 return (PyObject*) copy;
2312#else
2313 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2314 return NULL;
2315#endif
2316}
2317
2318static PyObject*
2319pattern_deepcopy(PatternObject* self, PyObject* args)
2320{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002321#ifdef USE_BUILTIN_COPY
2322 PatternObject* copy;
2323
2324 PyObject* memo;
2325 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2326 return NULL;
2327
2328 copy = (PatternObject*) pattern_copy(self, Py_None);
2329 if (!copy)
2330 return NULL;
2331
2332 if (!deepcopy(&copy->groupindex, memo) ||
2333 !deepcopy(&copy->indexgroup, memo) ||
2334 !deepcopy(&copy->pattern, memo)) {
2335 Py_DECREF(copy);
2336 return NULL;
2337 }
2338
2339#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002340 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2341 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002342#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002343}
2344
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002345static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002346 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2347 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2348 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2349 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2350 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2351 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002352#if PY_VERSION_HEX >= 0x02020000
2353 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS},
2354#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002355 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002356 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2357 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002358 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002359};
2360
2361static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002362pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002363{
2364 PyObject* res;
2365
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002366 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002367
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002368 if (res)
2369 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002370
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002371 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002372
2373 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002374 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002375 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002376 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002377 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002378
2379 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002380 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002381
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002382 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002383 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002385 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002386 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002387 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002388 }
2389
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002390 PyErr_SetString(PyExc_AttributeError, name);
2391 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002392}
2393
2394statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002395 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002396 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002397 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002398 (destructor)pattern_dealloc, /*tp_dealloc*/
2399 0, /*tp_print*/
2400 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002401};
2402
2403/* -------------------------------------------------------------------- */
2404/* match methods */
2405
2406static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002407match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002408{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002409 Py_XDECREF(self->regs);
2410 Py_XDECREF(self->string);
2411 Py_DECREF(self->pattern);
2412 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002413}
2414
2415static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002416match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002417{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002418 if (index < 0 || index >= self->groups) {
2419 /* raise IndexError if we were given a bad group number */
2420 PyErr_SetString(
2421 PyExc_IndexError,
2422 "no such group"
2423 );
2424 return NULL;
2425 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002426
Fredrik Lundh6f013982000-07-03 18:44:21 +00002427 index *= 2;
2428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002429 if (self->string == Py_None || self->mark[index] < 0) {
2430 /* return default value if the string or group is undefined */
2431 Py_INCREF(def);
2432 return def;
2433 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002434
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002435 return PySequence_GetSlice(
2436 self->string, self->mark[index], self->mark[index+1]
2437 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002438}
2439
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002440static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002441match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002442{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002443 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002444
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002445 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002446 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002447
Fredrik Lundh6f013982000-07-03 18:44:21 +00002448 i = -1;
2449
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002450 if (self->pattern->groupindex) {
2451 index = PyObject_GetItem(self->pattern->groupindex, index);
2452 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002453 if (PyInt_Check(index))
2454 i = (int) PyInt_AS_LONG(index);
2455 Py_DECREF(index);
2456 } else
2457 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002458 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002459
2460 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002461}
2462
2463static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002464match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002465{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002466 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002467}
2468
2469static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002470match_expand(MatchObject* self, PyObject* args)
2471{
2472 PyObject* template;
2473 if (!PyArg_ParseTuple(args, "O:expand", &template))
2474 return NULL;
2475
2476 /* delegate to Python code */
2477 return call(
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002478 SRE_MODULE, "_expand",
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002479 Py_BuildValue("OOO", self->pattern, self, template)
2480 );
2481}
2482
2483static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002484match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002485{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002486 PyObject* result;
2487 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002488
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002489 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002490
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002491 switch (size) {
2492 case 0:
2493 result = match_getslice(self, Py_False, Py_None);
2494 break;
2495 case 1:
2496 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2497 break;
2498 default:
2499 /* fetch multiple items */
2500 result = PyTuple_New(size);
2501 if (!result)
2502 return NULL;
2503 for (i = 0; i < size; i++) {
2504 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002505 self, PyTuple_GET_ITEM(args, i), Py_None
2506 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002507 if (!item) {
2508 Py_DECREF(result);
2509 return NULL;
2510 }
2511 PyTuple_SET_ITEM(result, i, item);
2512 }
2513 break;
2514 }
2515 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002516}
2517
2518static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002519match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002520{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002521 PyObject* result;
2522 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002523
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002524 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002525 static char* kwlist[] = { "default", NULL };
2526 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002527 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002528
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002529 result = PyTuple_New(self->groups-1);
2530 if (!result)
2531 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002532
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002533 for (index = 1; index < self->groups; index++) {
2534 PyObject* item;
2535 item = match_getslice_by_index(self, index, def);
2536 if (!item) {
2537 Py_DECREF(result);
2538 return NULL;
2539 }
2540 PyTuple_SET_ITEM(result, index-1, item);
2541 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002542
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002543 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002544}
2545
2546static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002547match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002548{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002549 PyObject* result;
2550 PyObject* keys;
2551 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002552
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002553 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002554 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002555 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002556 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002557
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002558 result = PyDict_New();
2559 if (!result || !self->pattern->groupindex)
2560 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002561
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002562 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002563 if (!keys)
2564 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002565
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002566 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002567 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002568 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002569 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002570 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002571 if (!key)
2572 goto failed;
2573 value = match_getslice(self, key, def);
2574 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002575 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002576 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002577 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002578 status = PyDict_SetItem(result, key, value);
2579 Py_DECREF(value);
2580 if (status < 0)
2581 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002582 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002583
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002584 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002585
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002586 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002587
2588failed:
2589 Py_DECREF(keys);
2590 Py_DECREF(result);
2591 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002592}
2593
2594static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002595match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002596{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002597 int index;
2598
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002599 PyObject* index_ = Py_False; /* zero */
2600 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2601 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002602
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002603 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002605 if (index < 0 || index >= self->groups) {
2606 PyErr_SetString(
2607 PyExc_IndexError,
2608 "no such group"
2609 );
2610 return NULL;
2611 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002612
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002613 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002614 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002615}
2616
2617static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002618match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002619{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002620 int index;
2621
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002622 PyObject* index_ = Py_False; /* zero */
2623 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2624 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002625
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002626 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002627
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002628 if (index < 0 || index >= self->groups) {
2629 PyErr_SetString(
2630 PyExc_IndexError,
2631 "no such group"
2632 );
2633 return NULL;
2634 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002635
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002636 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002637 return Py_BuildValue("i", self->mark[index*2+1]);
2638}
2639
2640LOCAL(PyObject*)
2641_pair(int i1, int i2)
2642{
2643 PyObject* pair;
2644 PyObject* item;
2645
2646 pair = PyTuple_New(2);
2647 if (!pair)
2648 return NULL;
2649
2650 item = PyInt_FromLong(i1);
2651 if (!item)
2652 goto error;
2653 PyTuple_SET_ITEM(pair, 0, item);
2654
2655 item = PyInt_FromLong(i2);
2656 if (!item)
2657 goto error;
2658 PyTuple_SET_ITEM(pair, 1, item);
2659
2660 return pair;
2661
2662 error:
2663 Py_DECREF(pair);
2664 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002665}
2666
2667static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002668match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002669{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002670 int index;
2671
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002672 PyObject* index_ = Py_False; /* zero */
2673 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2674 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002675
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002676 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002677
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002678 if (index < 0 || index >= self->groups) {
2679 PyErr_SetString(
2680 PyExc_IndexError,
2681 "no such group"
2682 );
2683 return NULL;
2684 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002685
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002686 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002687 return _pair(self->mark[index*2], self->mark[index*2+1]);
2688}
2689
2690static PyObject*
2691match_regs(MatchObject* self)
2692{
2693 PyObject* regs;
2694 PyObject* item;
2695 int index;
2696
2697 regs = PyTuple_New(self->groups);
2698 if (!regs)
2699 return NULL;
2700
2701 for (index = 0; index < self->groups; index++) {
2702 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2703 if (!item) {
2704 Py_DECREF(regs);
2705 return NULL;
2706 }
2707 PyTuple_SET_ITEM(regs, index, item);
2708 }
2709
2710 Py_INCREF(regs);
2711 self->regs = regs;
2712
2713 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002714}
2715
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002716static PyObject*
2717match_copy(MatchObject* self, PyObject* args)
2718{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002719#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002720 MatchObject* copy;
2721 int slots, offset;
2722
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002723 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2724 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002725
2726 slots = 2 * (self->pattern->groups+1);
2727
2728 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2729 if (!copy)
2730 return NULL;
2731
2732 /* this value a constant, but any compiler should be able to
2733 figure that out all by itself */
2734 offset = offsetof(MatchObject, string);
2735
2736 Py_XINCREF(self->pattern);
2737 Py_XINCREF(self->string);
2738 Py_XINCREF(self->regs);
2739
2740 memcpy((char*) copy + offset, (char*) self + offset,
2741 sizeof(MatchObject) + slots * sizeof(int) - offset);
2742
2743 return (PyObject*) copy;
2744#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002745 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002746 return NULL;
2747#endif
2748}
2749
2750static PyObject*
2751match_deepcopy(MatchObject* self, PyObject* args)
2752{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002753#ifdef USE_BUILTIN_COPY
2754 MatchObject* copy;
2755
2756 PyObject* memo;
2757 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2758 return NULL;
2759
2760 copy = (MatchObject*) match_copy(self, Py_None);
2761 if (!copy)
2762 return NULL;
2763
2764 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2765 !deepcopy(&copy->string, memo) ||
2766 !deepcopy(&copy->regs, memo)) {
2767 Py_DECREF(copy);
2768 return NULL;
2769 }
2770
2771#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002772 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2773 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002774#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002775}
2776
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002777static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002778 {"group", (PyCFunction) match_group, METH_VARARGS},
2779 {"start", (PyCFunction) match_start, METH_VARARGS},
2780 {"end", (PyCFunction) match_end, METH_VARARGS},
2781 {"span", (PyCFunction) match_span, METH_VARARGS},
2782 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2783 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2784 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002785 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2786 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002787 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002788};
2789
2790static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002791match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002792{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002793 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002794
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002795 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2796 if (res)
2797 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002798
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002799 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002800
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002801 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002802 if (self->lastindex >= 0)
2803 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002804 Py_INCREF(Py_None);
2805 return Py_None;
2806 }
2807
2808 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002809 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002810 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002811 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002812 );
2813 if (result)
2814 return result;
2815 PyErr_Clear();
2816 }
2817 Py_INCREF(Py_None);
2818 return Py_None;
2819 }
2820
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002821 if (!strcmp(name, "string")) {
2822 if (self->string) {
2823 Py_INCREF(self->string);
2824 return self->string;
2825 } else {
2826 Py_INCREF(Py_None);
2827 return Py_None;
2828 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002829 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002830
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002831 if (!strcmp(name, "regs")) {
2832 if (self->regs) {
2833 Py_INCREF(self->regs);
2834 return self->regs;
2835 } else
2836 return match_regs(self);
2837 }
2838
2839 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002840 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002841 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002842 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002843
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002844 if (!strcmp(name, "pos"))
2845 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002846
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002847 if (!strcmp(name, "endpos"))
2848 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002849
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002850 PyErr_SetString(PyExc_AttributeError, name);
2851 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002852}
2853
2854/* FIXME: implement setattr("string", None) as a special case (to
2855 detach the associated string, if any */
2856
2857statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002858 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002859 0, "_" SRE_MODULE ".SRE_Match",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002860 sizeof(MatchObject), sizeof(int),
2861 (destructor)match_dealloc, /*tp_dealloc*/
2862 0, /*tp_print*/
2863 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002864};
2865
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002866/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002867/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002868
2869static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002870scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002871{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002872 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002873 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002874 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002875}
2876
2877static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002878scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002879{
2880 SRE_STATE* state = &self->state;
2881 PyObject* match;
2882 int status;
2883
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002884 state_reset(state);
2885
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002886 state->ptr = state->start;
2887
2888 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002889 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002890 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002891#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002892 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002893#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002894 }
2895
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002896 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002897 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002898
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002899 if (status == 0 || state->ptr == state->start)
2900 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002901 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002902 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002903
2904 return match;
2905}
2906
2907
2908static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002909scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002910{
2911 SRE_STATE* state = &self->state;
2912 PyObject* match;
2913 int status;
2914
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002915 state_reset(state);
2916
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002917 state->ptr = state->start;
2918
2919 if (state->charsize == 1) {
2920 status = sre_search(state, PatternObject_GetCode(self->pattern));
2921 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002922#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002923 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002924#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002925 }
2926
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002927 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002928 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002929
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002930 if (status == 0 || state->ptr == state->start)
2931 state->start = (void*) ((char*) state->ptr + state->charsize);
2932 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002933 state->start = state->ptr;
2934
2935 return match;
2936}
2937
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002938static PyMethodDef scanner_methods[] = {
Neal Norwitzbb2769f2002-03-31 15:46:00 +00002939 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
2940 /* METH_OLDARGS is not in Python 1.5.2 */
2941 {"match", (PyCFunction) scanner_match, 0},
2942 {"search", (PyCFunction) scanner_search, 0},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002943 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002944};
2945
2946static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002947scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002948{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002949 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002950
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002951 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2952 if (res)
2953 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002954
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002955 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002956
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002957 /* attributes */
2958 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002959 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002960 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002961 }
2962
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002963 PyErr_SetString(PyExc_AttributeError, name);
2964 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002965}
2966
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002967statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002968 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002969 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002970 sizeof(ScannerObject), 0,
2971 (destructor)scanner_dealloc, /*tp_dealloc*/
2972 0, /*tp_print*/
2973 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002974};
2975
Guido van Rossumb700df92000-03-31 14:59:30 +00002976static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002977 {"compile", _compile, METH_VARARGS},
2978 {"getcodesize", sre_codesize, METH_VARARGS},
2979 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002980 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002981};
2982
Mark Hammond8235ea12002-07-19 06:55:41 +00002983PyMODINIT_FUNC init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002984{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002985 PyObject* m;
2986 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002987 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002988
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002989 /* Patch object types */
2990 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002991 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002992
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002993 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002994 d = PyModule_GetDict(m);
2995
Fredrik Lundh21009b92001-09-18 18:47:09 +00002996 x = PyInt_FromLong(SRE_MAGIC);
2997 if (x) {
2998 PyDict_SetItemString(d, "MAGIC", x);
2999 Py_DECREF(x);
3000 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003001
Fredrik Lundh21009b92001-09-18 18:47:09 +00003002 x = PyString_FromString(copyright);
3003 if (x) {
3004 PyDict_SetItemString(d, "copyright", x);
3005 Py_DECREF(x);
3006 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003007}
3008
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003009#endif /* !defined(SRE_RECURSIVE) */