blob: f4dbef042f13734284f3335b56151ac8f1dc2860 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000027 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000029 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000030 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000031 * 2001-04-28 fl added __copy__ methods (work in progress)
32 * 2001-05-14 fl fixes for 1.5.2
Fredrik Lundhf71ae462001-07-02 17:04:48 +000033 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000034 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000035 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000036 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh6de22ef2001-10-22 21:18:08 +000037 * 2001-10-22 fl check for literal sub/subn templates
Fredrik Lundh703ce812001-10-24 22:16:30 +000038 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000039 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Guido van Rossumb700df92000-03-31 14:59:30 +000040 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000041 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000042 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000043 * This version of the SRE library can be redistributed under CNRI's
44 * Python 1.6 license. For any other use, please contact Secret Labs
45 * AB (info@pythonware.com).
46 *
Guido van Rossumb700df92000-03-31 14:59:30 +000047 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000048 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000049 * other compatibility work.
50 */
51
52#ifndef SRE_RECURSIVE
53
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000054static char copyright[] =
Fredrik Lundhbec95b92001-10-21 16:47:57 +000055 " SRE 2.2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000056
57#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000058#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000059
60#include "sre.h"
61
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000062#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000063
Fredrik Lundh436c3d582000-06-29 08:58:44 +000064/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000065#if !defined(SRE_MODULE)
66#define SRE_MODULE "sre"
67#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000068
Guido van Rossumb700df92000-03-31 14:59:30 +000069/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000071
Fredrik Lundh971e78b2001-10-20 17:48:46 +000072#if PY_VERSION_HEX >= 0x01060000
73#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000074/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000075#define HAVE_UNICODE
76#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000077#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000078
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000080/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081
Fredrik Lundh33accc12000-08-27 20:59:47 +000082/* prevent run-away recursion (bad patterns on long strings) */
83
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000084#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000085#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
86/* require smaller recursion limit for a number of 64-bit platforms:
87 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
88/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
89#define USE_RECURSION_LIMIT 7500
90#else
91#define USE_RECURSION_LIMIT 10000
92#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000093#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000095/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000096#define USE_FAST_SEARCH
97
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000098/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000099#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000101/* enables copy/deepcopy handling (work in progress) */
102#undef USE_BUILTIN_COPY
103
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000104#if PY_VERSION_HEX < 0x01060000
105#define PyObject_DEL(op) PyMem_DEL((op))
106#endif
107
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000108/* -------------------------------------------------------------------- */
109
Fredrik Lundh80946112000-06-29 18:03:25 +0000110#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +0000111#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +0000112#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +0000113/* fastest possible local call under MSVC */
114#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000115#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000116#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000117#else
118#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000119#endif
120
121/* error codes */
122#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000123#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000124#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000125#define SRE_ERROR_MEMORY -9 /* out of memory */
126
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000127#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000128#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000129#else
130#define TRACE(v)
131#endif
132
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000133/* -------------------------------------------------------------------- */
134/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000135
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000136/* default character predicates (run sre_chars.py to regenerate tables) */
137
138#define SRE_DIGIT_MASK 1
139#define SRE_SPACE_MASK 2
140#define SRE_LINEBREAK_MASK 4
141#define SRE_ALNUM_MASK 8
142#define SRE_WORD_MASK 16
143
Fredrik Lundh21009b92001-09-18 18:47:09 +0000144/* FIXME: this assumes ASCII. create tables in init_sre() instead */
145
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000146static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1472, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1480, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
14925, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
15024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1510, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
15224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
153
Fredrik Lundhb389df32000-06-29 12:48:37 +0000154static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000015510, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
15627, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
15744, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
15861, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
159108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
160122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
161106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
162120, 121, 122, 123, 124, 125, 126, 127 };
163
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000164#define SRE_IS_DIGIT(ch)\
165 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
166#define SRE_IS_SPACE(ch)\
167 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
168#define SRE_IS_LINEBREAK(ch)\
169 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
170#define SRE_IS_ALNUM(ch)\
171 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
172#define SRE_IS_WORD(ch)\
173 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000174
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000175static unsigned int sre_lower(unsigned int ch)
176{
177 return ((ch) < 128 ? sre_char_lower[ch] : ch);
178}
179
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000180/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000182#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
183#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
184#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
185#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
186#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
187
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000188static unsigned int sre_lower_locale(unsigned int ch)
189{
190 return ((ch) < 256 ? tolower((ch)) : ch);
191}
192
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000193/* unicode-specific character predicates */
194
195#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000196
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000197#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
198#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
199#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000200#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000201#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000202
203static unsigned int sre_lower_unicode(unsigned int ch)
204{
205 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
206}
207
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000208#endif
209
Guido van Rossumb700df92000-03-31 14:59:30 +0000210LOCAL(int)
211sre_category(SRE_CODE category, unsigned int ch)
212{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000213 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000214
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000215 case SRE_CATEGORY_DIGIT:
216 return SRE_IS_DIGIT(ch);
217 case SRE_CATEGORY_NOT_DIGIT:
218 return !SRE_IS_DIGIT(ch);
219 case SRE_CATEGORY_SPACE:
220 return SRE_IS_SPACE(ch);
221 case SRE_CATEGORY_NOT_SPACE:
222 return !SRE_IS_SPACE(ch);
223 case SRE_CATEGORY_WORD:
224 return SRE_IS_WORD(ch);
225 case SRE_CATEGORY_NOT_WORD:
226 return !SRE_IS_WORD(ch);
227 case SRE_CATEGORY_LINEBREAK:
228 return SRE_IS_LINEBREAK(ch);
229 case SRE_CATEGORY_NOT_LINEBREAK:
230 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000231
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000232 case SRE_CATEGORY_LOC_WORD:
233 return SRE_LOC_IS_WORD(ch);
234 case SRE_CATEGORY_LOC_NOT_WORD:
235 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000236
237#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000238 case SRE_CATEGORY_UNI_DIGIT:
239 return SRE_UNI_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_NOT_DIGIT:
241 return !SRE_UNI_IS_DIGIT(ch);
242 case SRE_CATEGORY_UNI_SPACE:
243 return SRE_UNI_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_NOT_SPACE:
245 return !SRE_UNI_IS_SPACE(ch);
246 case SRE_CATEGORY_UNI_WORD:
247 return SRE_UNI_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_NOT_WORD:
249 return !SRE_UNI_IS_WORD(ch);
250 case SRE_CATEGORY_UNI_LINEBREAK:
251 return SRE_UNI_IS_LINEBREAK(ch);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
253 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000254#else
255 case SRE_CATEGORY_UNI_DIGIT:
256 return SRE_IS_DIGIT(ch);
257 case SRE_CATEGORY_UNI_NOT_DIGIT:
258 return !SRE_IS_DIGIT(ch);
259 case SRE_CATEGORY_UNI_SPACE:
260 return SRE_IS_SPACE(ch);
261 case SRE_CATEGORY_UNI_NOT_SPACE:
262 return !SRE_IS_SPACE(ch);
263 case SRE_CATEGORY_UNI_WORD:
264 return SRE_LOC_IS_WORD(ch);
265 case SRE_CATEGORY_UNI_NOT_WORD:
266 return !SRE_LOC_IS_WORD(ch);
267 case SRE_CATEGORY_UNI_LINEBREAK:
268 return SRE_IS_LINEBREAK(ch);
269 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
270 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000271#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000272 }
273 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000274}
275
276/* helpers */
277
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000279mark_fini(SRE_STATE* state)
280{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000281 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000282 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000283 state->mark_stack = NULL;
284 }
285 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000286}
287
288static int
289mark_save(SRE_STATE* state, int lo, int hi)
290{
291 void* stack;
292 int size;
293 int minsize, newsize;
294
295 if (hi <= lo)
296 return 0;
297
298 size = (hi - lo) + 1;
299
300 newsize = state->mark_stack_size;
301 minsize = state->mark_stack_base + size;
302
303 if (newsize < minsize) {
304 /* create new stack */
305 if (!newsize) {
306 newsize = 512;
307 if (newsize < minsize)
308 newsize = minsize;
309 TRACE(("allocate stack %d\n", newsize));
310 stack = malloc(sizeof(void*) * newsize);
311 } else {
312 /* grow the stack */
313 while (newsize < minsize)
314 newsize += newsize;
315 TRACE(("grow stack to %d\n", newsize));
316 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
317 }
318 if (!stack) {
319 mark_fini(state);
320 return SRE_ERROR_MEMORY;
321 }
322 state->mark_stack = stack;
323 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000324 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000325
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000326 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000327
328 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
329 size * sizeof(void*));
330
331 state->mark_stack_base += size;
332
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000333 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000334}
335
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000336static int
337mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000338{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000339 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000341 if (hi <= lo)
342 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000344 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000345
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000346 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000347
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000348 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000349
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000350 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
351 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000354}
355
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000356/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000357
358#define SRE_CHAR unsigned char
359#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000360#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000361#define SRE_CHARSET sre_charset
362#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000363#define SRE_MATCH sre_match
364#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000366
367#if defined(HAVE_UNICODE)
368
Guido van Rossumb700df92000-03-31 14:59:30 +0000369#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000370#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000371#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000372
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000373#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000374#undef SRE_SEARCH
375#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000376#undef SRE_INFO
377#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000378#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000379#undef SRE_AT
380#undef SRE_CHAR
381
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000382/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000383
384#define SRE_CHAR Py_UNICODE
385#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000386#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000387#define SRE_CHARSET sre_ucharset
388#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000389#define SRE_MATCH sre_umatch
390#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000391#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000392#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000393
394#endif /* SRE_RECURSIVE */
395
396/* -------------------------------------------------------------------- */
397/* String matching engine */
398
399/* the following section is compiled twice, with different character
400 settings */
401
402LOCAL(int)
403SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
404{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000405 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000412 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000414
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000415 case SRE_AT_BEGINNING_LINE:
416 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000417 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000418
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000419 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000420 return (((void*) (ptr+1) == state->end &&
421 SRE_IS_LINEBREAK((int) ptr[0])) ||
422 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000423
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000424 case SRE_AT_END_LINE:
425 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000426 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000427
Fredrik Lundh770617b2001-01-14 15:06:11 +0000428 case SRE_AT_END_STRING:
429 return ((void*) ptr == state->end);
430
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000431 case SRE_AT_BOUNDARY:
432 if (state->beginning == state->end)
433 return 0;
434 that = ((void*) ptr > state->beginning) ?
435 SRE_IS_WORD((int) ptr[-1]) : 0;
436 this = ((void*) ptr < state->end) ?
437 SRE_IS_WORD((int) ptr[0]) : 0;
438 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000439
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000440 case SRE_AT_NON_BOUNDARY:
441 if (state->beginning == state->end)
442 return 0;
443 that = ((void*) ptr > state->beginning) ?
444 SRE_IS_WORD((int) ptr[-1]) : 0;
445 this = ((void*) ptr < state->end) ?
446 SRE_IS_WORD((int) ptr[0]) : 0;
447 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000448
449 case SRE_AT_LOC_BOUNDARY:
450 if (state->beginning == state->end)
451 return 0;
452 that = ((void*) ptr > state->beginning) ?
453 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
454 this = ((void*) ptr < state->end) ?
455 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
456 return this != that;
457
458 case SRE_AT_LOC_NON_BOUNDARY:
459 if (state->beginning == state->end)
460 return 0;
461 that = ((void*) ptr > state->beginning) ?
462 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
463 this = ((void*) ptr < state->end) ?
464 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
465 return this == that;
466
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000467#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000468 case SRE_AT_UNI_BOUNDARY:
469 if (state->beginning == state->end)
470 return 0;
471 that = ((void*) ptr > state->beginning) ?
472 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
473 this = ((void*) ptr < state->end) ?
474 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
475 return this != that;
476
477 case SRE_AT_UNI_NON_BOUNDARY:
478 if (state->beginning == state->end)
479 return 0;
480 that = ((void*) ptr > state->beginning) ?
481 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
482 this = ((void*) ptr < state->end) ?
483 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
484 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000485#endif
486
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000487 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000488
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000489 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000490}
491
492LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000493SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000494{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000495 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000496
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000497 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000498
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000499 for (;;) {
500 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000501
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000502 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000503 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000504 if (ch == set[0])
505 return ok;
506 set++;
507 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000508
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000509 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000510 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000511 if (set[0] <= ch && ch <= set[1])
512 return ok;
513 set += 2;
514 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000515
Fredrik Lundh3562f112000-07-02 12:00:07 +0000516 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000517 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000518 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
519 return ok;
520 set += 16;
521 break;
522
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000523 case SRE_OP_BIGCHARSET:
524 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
525 {
526 int count, block;
527 count = *(set++);
528 block = ((unsigned char*)set)[ch >> 8];
529 set += 128;
530 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
531 return ok;
532 set += count*16;
533 break;
534 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000535
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000536 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000537 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000538 if (sre_category(set[0], (int) ch))
539 return ok;
540 set += 1;
541 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000542
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000543 case SRE_OP_NEGATE:
544 ok = !ok;
545 break;
546
547 case SRE_OP_FAILURE:
548 return !ok;
549
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000550 default:
551 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000552 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000553 return 0;
554 }
555 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000556}
557
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000558LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
559
560LOCAL(int)
561SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
562{
563 SRE_CODE chr;
564 SRE_CHAR* ptr = state->ptr;
565 SRE_CHAR* end = state->end;
566 int i;
567
568 /* adjust end */
569 if (maxcount < end - ptr && maxcount != 65535)
570 end = ptr + maxcount;
571
572 switch (pattern[0]) {
573
574 case SRE_OP_ANY:
575 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000576 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000577 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
578 ptr++;
579 break;
580
581 case SRE_OP_ANY_ALL:
582 /* repeated dot wildcare. skip to the end of the target
583 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000584 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000585 ptr = end;
586 break;
587
588 case SRE_OP_LITERAL:
589 /* repeated literal */
590 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000591 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000592 while (ptr < end && (SRE_CODE) *ptr == chr)
593 ptr++;
594 break;
595
596 case SRE_OP_LITERAL_IGNORE:
597 /* repeated literal */
598 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000599 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000600 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
601 ptr++;
602 break;
603
604 case SRE_OP_NOT_LITERAL:
605 /* repeated non-literal */
606 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000607 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000608 while (ptr < end && (SRE_CODE) *ptr != chr)
609 ptr++;
610 break;
611
612 case SRE_OP_NOT_LITERAL_IGNORE:
613 /* repeated non-literal */
614 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000615 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000616 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
617 ptr++;
618 break;
619
620 case SRE_OP_IN:
621 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000622 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
623 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000624 ptr++;
625 break;
626
627 default:
628 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000629 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000630 while ((SRE_CHAR*) state->ptr < end) {
631 i = SRE_MATCH(state, pattern, level);
632 if (i < 0)
633 return i;
634 if (!i)
635 break;
636 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000637 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
638 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000639 return (SRE_CHAR*) state->ptr - ptr;
640 }
641
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000642 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000643 return ptr - (SRE_CHAR*) state->ptr;
644}
645
Fredrik Lundh33accc12000-08-27 20:59:47 +0000646#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000647LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000648SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
649{
650 /* check if an SRE_OP_INFO block matches at the current position.
651 returns the number of SRE_CODE objects to skip if successful, 0
652 if no match */
653
654 SRE_CHAR* end = state->end;
655 SRE_CHAR* ptr = state->ptr;
656 int i;
657
658 /* check minimal length */
659 if (pattern[3] && (end - ptr) < pattern[3])
660 return 0;
661
662 /* check known prefix */
663 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
664 /* <length> <skip> <prefix data> <overlap data> */
665 for (i = 0; i < pattern[5]; i++)
666 if ((SRE_CODE) ptr[i] != pattern[7 + i])
667 return 0;
668 return pattern[0] + 2 * pattern[6];
669 }
670 return pattern[0];
671}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000672#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000673
674LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000675SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000676{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000677 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000679
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000680 SRE_CHAR* end = state->end;
681 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000682 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 SRE_REPEAT* rp;
684 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000685 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000686
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000687 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000688
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000689 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000690
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000691#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000692 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000693 return SRE_ERROR_RECURSION_LIMIT;
694#endif
695
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000696#if defined(USE_RECURSION_LIMIT)
697 if (level > USE_RECURSION_LIMIT)
698 return SRE_ERROR_RECURSION_LIMIT;
699#endif
700
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000701 if (pattern[0] == SRE_OP_INFO) {
702 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000703 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000704 if (pattern[3] && (end - ptr) < pattern[3]) {
705 TRACE(("reject (got %d chars, need %d)\n",
706 (end - ptr), pattern[3]));
707 return 0;
708 }
709 pattern += pattern[1] + 1;
710 }
711
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000712 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000713
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000714 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000715
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000716 case SRE_OP_FAILURE:
717 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000718 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000719 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000720
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000721 case SRE_OP_SUCCESS:
722 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000723 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000725 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 case SRE_OP_AT:
728 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000729 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000730 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000731 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000732 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000733 pattern++;
734 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000735
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 case SRE_OP_CATEGORY:
737 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000738 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000739 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000741 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000743 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000744 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000745
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 case SRE_OP_LITERAL:
747 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000748 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000749 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000750 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000751 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 pattern++;
753 ptr++;
754 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000755
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000756 case SRE_OP_NOT_LITERAL:
757 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000758 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000759 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000760 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000761 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 pattern++;
763 ptr++;
764 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000765
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000766 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000767 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000768 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000769 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000770 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
771 return 0;
772 ptr++;
773 break;
774
775 case SRE_OP_ANY_ALL:
776 /* match anything */
777 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000778 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000779 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000780 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000781 ptr++;
782 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000783
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000784 case SRE_OP_IN:
785 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000786 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000787 TRACE(("|%p|%p|IN\n", pattern, ptr));
788 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000789 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 pattern += pattern[0];
791 ptr++;
792 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000793
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000794 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000795 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000796 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000797 i = pattern[0];
798 {
799 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
800 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
801 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000802 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000803 while (p < e) {
804 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000805 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000806 p++; ptr++;
807 }
808 }
809 pattern++;
810 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000811
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000812 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000813 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000814 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815 i = pattern[0];
816 {
817 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
818 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
819 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000820 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000821 while (p < e) {
822 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000823 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000824 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000825 p++; ptr++;
826 }
827 }
828 pattern++;
829 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000830
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000831 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000832 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000833 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000834 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000835 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000836 pattern++;
837 ptr++;
838 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000839
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000840 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000841 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000842 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000843 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000844 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000845 pattern++;
846 ptr++;
847 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000848
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000849 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000850 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000851 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000852 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000853 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000854 pattern += pattern[0];
855 ptr++;
856 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000857
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000858 case SRE_OP_MARK:
859 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000860 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000861 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000862 i = pattern[0];
863 if (i & 1)
864 state->lastindex = i/2 + 1;
865 if (i > state->lastmark)
866 state->lastmark = i;
867 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000868 pattern++;
869 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000870
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000871 case SRE_OP_JUMP:
872 case SRE_OP_INFO:
873 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000874 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000875 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000876 pattern += pattern[0];
877 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000878
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000879 case SRE_OP_ASSERT:
880 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000881 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000882 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000883 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000884 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000885 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000886 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000887 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000888 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000889 pattern += pattern[0];
890 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000891
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000892 case SRE_OP_ASSERT_NOT:
893 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000894 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000895 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000896 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000897 if (state->ptr >= state->beginning) {
898 i = SRE_MATCH(state, pattern + 2, level + 1);
899 if (i < 0)
900 return i;
901 if (i)
902 return 0;
903 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000904 pattern += pattern[0];
905 break;
906
907 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000908 /* alternation */
909 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000910 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000911 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000912 for (; pattern[0]; pattern += pattern[0]) {
913 if (pattern[1] == SRE_OP_LITERAL &&
914 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
915 continue;
916 if (pattern[1] == SRE_OP_IN &&
917 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
918 continue;
919 state->ptr = ptr;
920 i = SRE_MATCH(state, pattern + 1, level + 1);
921 if (i)
922 return i;
923 if (state->lastmark > lastmark) {
924 memset(
925 state->mark + lastmark + 1, 0,
926 (state->lastmark - lastmark) * sizeof(void*)
927 );
928 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000929 }
930 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000931 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000932
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000933 case SRE_OP_REPEAT_ONE:
934 /* match repeated sequence (maximizing regexp) */
935
936 /* this operator only works if the repeated item is
937 exactly one character wide, and we're not already
938 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000939 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000940
941 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
942
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000943 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000944 pattern[1], pattern[2]));
945
Fredrik Lundhe1869832000-08-01 22:47:49 +0000946 if (ptr + pattern[1] > end)
947 return 0; /* cannot match */
948
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000949 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000950
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000951 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
952 if (count < 0)
953 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000954
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000955 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000956
957 /* when we arrive here, count contains the number of
958 matches, and ptr points to the tail of the target
959 string. check if the rest of the pattern matches,
960 and backtrack if not. */
961
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000962 if (count < (int) pattern[1])
963 return 0;
964
965 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
966 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000967 state->ptr = ptr;
968 return 1;
969
970 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
971 /* tail starts with a literal. skip positions where
972 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000973 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000974 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000975 while (count >= (int) pattern[1] &&
976 (ptr >= end || *ptr != chr)) {
977 ptr--;
978 count--;
979 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000980 if (count < (int) pattern[1])
981 break;
982 state->ptr = ptr;
983 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000984 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000985 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000986 ptr--;
987 count--;
988 }
989
990 } else {
991 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000992 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000993 while (count >= (int) pattern[1]) {
994 state->ptr = ptr;
995 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000996 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000997 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000998 ptr--;
999 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001000 if (state->lastmark > lastmark) {
1001 memset(
1002 state->mark + lastmark + 1, 0,
1003 (state->lastmark - lastmark) * sizeof(void*)
1004 );
1005 state->lastmark = lastmark;
1006 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001007 }
1008 }
1009 return 0;
1010
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001011 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001012 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001013 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001014 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001015 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001016 pattern[1], pattern[2]));
1017
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001018 rep.count = -1;
1019 rep.pattern = pattern;
1020
1021 /* install new repeat context */
1022 rep.prev = state->repeat;
1023 state->repeat = &rep;
1024
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001025 state->ptr = ptr;
1026 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001027
1028 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001029
1030 return i;
1031
1032 case SRE_OP_MAX_UNTIL:
1033 /* maximizing repeat */
1034 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1035
1036 /* FIXME: we probably need to deal with zero-width
1037 matches in here... */
1038
1039 rp = state->repeat;
1040 if (!rp)
1041 return SRE_ERROR_STATE;
1042
1043 state->ptr = ptr;
1044
1045 count = rp->count + 1;
1046
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001047 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001048
1049 if (count < rp->pattern[1]) {
1050 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001051 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001052 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001053 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001054 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001055 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001056 rp->count = count - 1;
1057 state->ptr = ptr;
1058 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001059 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001060
1061 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001062 /* we may have enough matches, but if we can
1063 match another item, do so */
1064 rp->count = count;
1065 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001066 i = mark_save(state, 0, lastmark);
1067 if (i < 0)
1068 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001069 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001070 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001071 if (i)
1072 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001073 i = mark_restore(state, 0, lastmark);
Fredrik Lundh397a6542001-10-18 19:30:16 +00001074 state->lastmark = lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001075 if (i < 0)
1076 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001077 rp->count = count - 1;
1078 state->ptr = ptr;
1079 }
1080
1081 /* cannot match more repeated items here. make sure the
1082 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001083 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001084 i = SRE_MATCH(state, pattern, level + 1);
1085 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001086 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001087 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001088 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001089 return 0;
1090
1091 case SRE_OP_MIN_UNTIL:
1092 /* minimizing repeat */
1093 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1094
1095 rp = state->repeat;
1096 if (!rp)
1097 return SRE_ERROR_STATE;
1098
1099 count = rp->count + 1;
1100
Fredrik Lundh770617b2001-01-14 15:06:11 +00001101 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1102 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001103
1104 state->ptr = ptr;
1105
1106 if (count < rp->pattern[1]) {
1107 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001108 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001109 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001110 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001111 if (i)
1112 return i;
1113 rp->count = count-1;
1114 state->ptr = ptr;
1115 return 0;
1116 }
1117
1118 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001119 state->repeat = rp->prev;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001120 i = SRE_MATCH(state, pattern, level + 1);
1121 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001122 return i;
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001123
Fredrik Lundh770617b2001-01-14 15:06:11 +00001124 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001125 state->repeat = rp;
1126
1127 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1128 return 0;
1129
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001130 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001131 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001132 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001133 if (i)
1134 return i;
1135 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001136 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001137 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001138
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001139 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001140 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001141 return SRE_ERROR_ILLEGAL;
1142 }
1143 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001144
Sjoerd Mullender89dfe9e2001-08-30 14:37:07 +00001145 /* can't end up here */
Fredrik Lundh21009b92001-09-18 18:47:09 +00001146 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
Guido van Rossumb700df92000-03-31 14:59:30 +00001147}
1148
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001149LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001150SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1151{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001152 SRE_CHAR* ptr = state->start;
1153 SRE_CHAR* end = state->end;
1154 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001155 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001156 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001157 SRE_CODE* prefix = NULL;
1158 SRE_CODE* charset = NULL;
1159 SRE_CODE* overlap = NULL;
1160 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001161
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001162 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001163 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001164 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001165
1166 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001167
1168 if (pattern[3] > 0) {
1169 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001170 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001171 end -= pattern[3]-1;
1172 if (end <= ptr)
1173 end = ptr+1;
1174 }
1175
Fredrik Lundh3562f112000-07-02 12:00:07 +00001176 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001177 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001178 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001179 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001180 prefix_skip = pattern[6];
1181 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001182 overlap = prefix + prefix_len - 1;
1183 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001184 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001185 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001186 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001187
1188 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001189 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001190
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001191 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1192 TRACE(("charset = %p\n", charset));
1193
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001194#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001195 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001196 /* pattern starts with a known prefix. use the overlap
1197 table to skip forward as fast as we possibly can */
1198 int i = 0;
1199 end = state->end;
1200 while (ptr < end) {
1201 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001202 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001203 if (!i)
1204 break;
1205 else
1206 i = overlap[i];
1207 } else {
1208 if (++i == prefix_len) {
1209 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001210 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1211 state->start = ptr + 1 - prefix_len;
1212 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001213 if (flags & SRE_INFO_LITERAL)
1214 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001215 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001216 if (status != 0)
1217 return status;
1218 /* close but no cigar -- try again */
1219 i = overlap[i];
1220 }
1221 break;
1222 }
1223
1224 }
1225 ptr++;
1226 }
1227 return 0;
1228 }
1229#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001230
Fredrik Lundh3562f112000-07-02 12:00:07 +00001231 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001232 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001233 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001234 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001235 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001236 for (;;) {
1237 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1238 ptr++;
1239 if (ptr == end)
1240 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001241 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 state->start = ptr;
1243 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001244 if (flags & SRE_INFO_LITERAL)
1245 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001246 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001247 if (status != 0)
1248 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001249 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001250 } else if (charset) {
1251 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001252 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001253 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001254 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001255 ptr++;
1256 if (ptr == end)
1257 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001258 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001259 state->start = ptr;
1260 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001261 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001262 if (status != 0)
1263 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001264 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001265 }
1266 } else
1267 /* general case */
1268 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001269 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001270 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001271 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001272 if (status != 0)
1273 break;
1274 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001275
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001276 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001277}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001278
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001279LOCAL(int)
1280SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1281{
1282 /* check if given string is a literal template (i.e. no escapes) */
1283 while (len-- > 0)
1284 if (*ptr++ == '\\')
1285 return 0;
1286 return 1;
1287}
Guido van Rossumb700df92000-03-31 14:59:30 +00001288
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001289#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001290
1291/* -------------------------------------------------------------------- */
1292/* factories and destructors */
1293
1294/* see sre.h for object declarations */
1295
Jeremy Hylton938ace62002-07-17 16:30:39 +00001296static PyTypeObject Pattern_Type;
1297static PyTypeObject Match_Type;
1298static PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001299
1300static PyObject *
1301_compile(PyObject* self_, PyObject* args)
1302{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001303 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001304
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001305 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001306 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001307
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001308 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001309 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001310 PyObject* code;
1311 int groups = 0;
1312 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001313 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001314 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1315 &PyList_Type, &code, &groups,
1316 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001317 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001318
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001319 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001320
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001321 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001322 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001323 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001324
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001325 self->codesize = n;
1326
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001327 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001328 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001329 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001330 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001331
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001332 if (PyErr_Occurred()) {
1333 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001334 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001335 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001336
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001337 Py_INCREF(pattern);
1338 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001339
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001340 self->flags = flags;
1341
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001342 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001343
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001344 Py_XINCREF(groupindex);
1345 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001346
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001347 Py_XINCREF(indexgroup);
1348 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001349
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001350 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001351}
1352
1353static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001354sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001355{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001356 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001357}
1358
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001359static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001360sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001361{
1362 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001363 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001364 return NULL;
1365 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001366 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001367 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001368#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001369 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001370#else
1371 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001372#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001373 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001374}
1375
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001376LOCAL(void)
1377state_reset(SRE_STATE* state)
1378{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001379 state->lastmark = 0;
1380
1381 /* FIXME: dynamic! */
Neal Norwitz35fc7602002-06-13 21:11:11 +00001382 memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001383
1384 state->lastindex = -1;
1385
1386 state->repeat = NULL;
1387
1388 mark_fini(state);
1389}
1390
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001391static void*
1392getstring(PyObject* string, int* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001393{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001394 /* given a python object, return a data pointer, a length (in
1395 characters), and a character size. return NULL if the object
1396 is not a string (or not compatible) */
1397
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001398 PyBufferProcs *buffer;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001399 int size, bytes, charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001400 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001401
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001402#if defined(HAVE_UNICODE)
1403 if (PyUnicode_Check(string)) {
1404 /* unicode strings doesn't always support the buffer interface */
1405 ptr = (void*) PyUnicode_AS_DATA(string);
1406 bytes = PyUnicode_GET_DATA_SIZE(string);
1407 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001408 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001409
1410 } else {
1411#endif
1412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001413 /* get pointer to string buffer */
1414 buffer = string->ob_type->tp_as_buffer;
1415 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1416 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001417 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001418 return NULL;
1419 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001420
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001421 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001422 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1423 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001424 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1425 return NULL;
1426 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001427
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001428 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001429#if PY_VERSION_HEX >= 0x01060000
1430 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001431#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001432 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001433#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001434
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001435 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001436 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001437#if defined(HAVE_UNICODE)
1438 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001439 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001440#endif
1441 else {
1442 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1443 return NULL;
1444 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001445
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001446#if defined(HAVE_UNICODE)
1447 }
1448#endif
1449
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001450 *p_length = size;
1451 *p_charsize = charsize;
1452
1453 return ptr;
1454}
1455
1456LOCAL(PyObject*)
1457state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1458 int start, int end)
1459{
1460 /* prepare state object */
1461
1462 int length;
1463 int charsize;
1464 void* ptr;
1465
1466 memset(state, 0, sizeof(SRE_STATE));
1467
1468 state->lastindex = -1;
1469
1470 ptr = getstring(string, &length, &charsize);
1471 if (!ptr)
1472 return NULL;
1473
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001474 /* adjust boundaries */
1475 if (start < 0)
1476 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001477 else if (start > length)
1478 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001479
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001480 if (end < 0)
1481 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001482 else if (end > length)
1483 end = length;
1484
1485 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001486
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001487 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001488
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001489 state->start = (void*) ((char*) ptr + start * state->charsize);
1490 state->end = (void*) ((char*) ptr + end * state->charsize);
1491
1492 Py_INCREF(string);
1493 state->string = string;
1494 state->pos = start;
1495 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001496
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001497 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001498 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001499 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001500#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001501 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001502#else
1503 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001504#endif
1505 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001506 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001507
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001508 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001509}
1510
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001511LOCAL(void)
1512state_fini(SRE_STATE* state)
1513{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001514 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001515 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001516}
1517
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001518/* calculate offset from start of string */
1519#define STATE_OFFSET(state, member)\
1520 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1521
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001522LOCAL(PyObject*)
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001523state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001524{
Fredrik Lundh58100642000-08-09 09:14:35 +00001525 int i, j;
1526
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001527 index = (index - 1) * 2;
1528
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001529 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001530 if (empty)
1531 /* want empty string */
1532 i = j = 0;
1533 else {
1534 Py_INCREF(Py_None);
1535 return Py_None;
1536 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001537 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001538 i = STATE_OFFSET(state, state->mark[index]);
1539 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001540 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001541
Fredrik Lundh58100642000-08-09 09:14:35 +00001542 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001543}
1544
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001545static void
1546pattern_error(int status)
1547{
1548 switch (status) {
1549 case SRE_ERROR_RECURSION_LIMIT:
1550 PyErr_SetString(
1551 PyExc_RuntimeError,
1552 "maximum recursion limit exceeded"
1553 );
1554 break;
1555 case SRE_ERROR_MEMORY:
1556 PyErr_NoMemory();
1557 break;
1558 default:
1559 /* other error codes indicate compiler/engine bugs */
1560 PyErr_SetString(
1561 PyExc_RuntimeError,
1562 "internal error in regular expression engine"
1563 );
1564 }
1565}
1566
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001567static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001568pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001569{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001570 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001571
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 MatchObject* match;
1573 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001574 char* base;
1575 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001576
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001577 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001578
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001579 /* create match object (with room for extra group marks) */
1580 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001581 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001582 if (!match)
1583 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001585 Py_INCREF(pattern);
1586 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001587
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001588 Py_INCREF(state->string);
1589 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001590
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001591 match->regs = NULL;
1592 match->groups = pattern->groups+1;
1593
1594 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001595
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001596 base = (char*) state->beginning;
1597 n = state->charsize;
1598
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001599 match->mark[0] = ((char*) state->start - base) / n;
1600 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001602 for (i = j = 0; i < pattern->groups; i++, j+=2)
1603 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1604 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1605 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1606 } else
1607 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1608
1609 match->pos = state->pos;
1610 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001611
Fredrik Lundh6f013982000-07-03 18:44:21 +00001612 match->lastindex = state->lastindex;
1613
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001614 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001615
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001616 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001617
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001618 /* no match */
1619 Py_INCREF(Py_None);
1620 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001621
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001622 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001623
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001624 /* internal error */
1625 pattern_error(status);
1626 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001627}
1628
1629static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001630pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001631{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001632 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001633
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001634 ScannerObject* self;
1635
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001636 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001637 int start = 0;
1638 int end = INT_MAX;
1639 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1640 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001641
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001642 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001643 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001644 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001645 return NULL;
1646
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001647 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001648 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001649 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001650 return NULL;
1651 }
1652
1653 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001654 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001655
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001656 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001657}
1658
Guido van Rossumb700df92000-03-31 14:59:30 +00001659static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001660pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001661{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001662 Py_XDECREF(self->pattern);
1663 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001664 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001665 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001666}
1667
1668static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001669pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001670{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001671 SRE_STATE state;
1672 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001673
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001674 PyObject* string;
1675 int start = 0;
1676 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001677 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1678 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1679 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001680 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001681
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001682 string = state_init(&state, self, string, start, end);
1683 if (!string)
1684 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001685
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001686 state.ptr = state.start;
1687
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001688 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1689
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001690 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001691 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001692 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001693#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001694 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001695#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001696 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001697
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001698 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1699
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001700 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001701
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001703}
1704
1705static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001706pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001707{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001708 SRE_STATE state;
1709 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001710
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001711 PyObject* string;
1712 int start = 0;
1713 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001714 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1715 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1716 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001717 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001718
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001719 string = state_init(&state, self, string, start, end);
1720 if (!string)
1721 return NULL;
1722
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001723 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1724
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001725 if (state.charsize == 1) {
1726 status = sre_search(&state, PatternObject_GetCode(self));
1727 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001728#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001729 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001730#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001731 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001732
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001733 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1734
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001735 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001736
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001737 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001738}
1739
1740static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001741call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001742{
1743 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001744 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001745 PyObject* func;
1746 PyObject* result;
1747
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001748 if (!args)
1749 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001750 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001751 if (!name)
1752 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001753 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001754 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001755 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001756 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001757 func = PyObject_GetAttrString(mod, function);
1758 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001759 if (!func)
1760 return NULL;
1761 result = PyObject_CallObject(func, args);
1762 Py_DECREF(func);
1763 Py_DECREF(args);
1764 return result;
1765}
1766
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001767#ifdef USE_BUILTIN_COPY
1768static int
1769deepcopy(PyObject** object, PyObject* memo)
1770{
1771 PyObject* copy;
1772
1773 copy = call(
1774 "copy", "deepcopy",
1775 Py_BuildValue("OO", *object, memo)
1776 );
1777 if (!copy)
1778 return 0;
1779
1780 Py_DECREF(*object);
1781 *object = copy;
1782
1783 return 1; /* success */
1784}
1785#endif
1786
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001787static PyObject*
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00001788join_list(PyObject* list, PyObject* pattern)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001789{
1790 /* join list elements */
1791
1792 PyObject* joiner;
1793#if PY_VERSION_HEX >= 0x01060000
1794 PyObject* function;
1795 PyObject* args;
1796#endif
1797 PyObject* result;
1798
1799 switch (PyList_GET_SIZE(list)) {
1800 case 0:
1801 Py_DECREF(list);
1802 return PyString_FromString("");
1803 case 1:
1804 result = PyList_GET_ITEM(list, 0);
1805 Py_INCREF(result);
1806 Py_DECREF(list);
1807 return result;
1808 }
1809
1810 /* two or more elements: slice out a suitable separator from the
1811 first member, and use that to join the entire list */
1812
1813 joiner = PySequence_GetSlice(pattern, 0, 0);
1814 if (!joiner)
1815 return NULL;
1816
1817#if PY_VERSION_HEX >= 0x01060000
1818 function = PyObject_GetAttrString(joiner, "join");
1819 if (!function) {
1820 Py_DECREF(joiner);
1821 return NULL;
1822 }
1823 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001824 if (!args) {
1825 Py_DECREF(function);
1826 Py_DECREF(joiner);
1827 return NULL;
1828 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001829 PyTuple_SET_ITEM(args, 0, list);
1830 result = PyObject_CallObject(function, args);
1831 Py_DECREF(args); /* also removes list */
1832 Py_DECREF(function);
1833#else
1834 result = call(
1835 "string", "join",
1836 Py_BuildValue("OO", list, joiner)
1837 );
1838#endif
1839 Py_DECREF(joiner);
1840
1841 return result;
1842}
1843
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001844static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001845pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001846{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001847 SRE_STATE state;
1848 PyObject* list;
1849 int status;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001850 int i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00001851
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001852 PyObject* string;
1853 int start = 0;
1854 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001855 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1856 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1857 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001858 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001859
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001860 string = state_init(&state, self, string, start, end);
1861 if (!string)
1862 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001863
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001864 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001865 if (!list) {
1866 state_fini(&state);
1867 return NULL;
1868 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001869
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001870 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001871
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 PyObject* item;
1873
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001874 state_reset(&state);
1875
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001876 state.ptr = state.start;
1877
1878 if (state.charsize == 1) {
1879 status = sre_search(&state, PatternObject_GetCode(self));
1880 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001881#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001882 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001883#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001884 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001885
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001886 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001887 if (status == 0)
1888 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001889 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001890 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001891 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001892
1893 /* don't bother to build a match object */
1894 switch (self->groups) {
1895 case 0:
1896 b = STATE_OFFSET(&state, state.start);
1897 e = STATE_OFFSET(&state, state.ptr);
1898 item = PySequence_GetSlice(string, b, e);
1899 if (!item)
1900 goto error;
1901 break;
1902 case 1:
1903 item = state_getslice(&state, 1, string, 1);
1904 if (!item)
1905 goto error;
1906 break;
1907 default:
1908 item = PyTuple_New(self->groups);
1909 if (!item)
1910 goto error;
1911 for (i = 0; i < self->groups; i++) {
1912 PyObject* o = state_getslice(&state, i+1, string, 1);
1913 if (!o) {
1914 Py_DECREF(item);
1915 goto error;
1916 }
1917 PyTuple_SET_ITEM(item, i, o);
1918 }
1919 break;
1920 }
1921
1922 status = PyList_Append(list, item);
1923 Py_DECREF(item);
1924 if (status < 0)
1925 goto error;
1926
1927 if (state.ptr == state.start)
1928 state.start = (void*) ((char*) state.ptr + state.charsize);
1929 else
1930 state.start = state.ptr;
1931
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001932 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 state_fini(&state);
1935 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001936
1937error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001938 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001939 state_fini(&state);
1940 return NULL;
1941
Guido van Rossumb700df92000-03-31 14:59:30 +00001942}
1943
Fredrik Lundh703ce812001-10-24 22:16:30 +00001944#if PY_VERSION_HEX >= 0x02020000
1945static PyObject*
1946pattern_finditer(PatternObject* pattern, PyObject* args)
1947{
1948 PyObject* scanner;
1949 PyObject* search;
1950 PyObject* iterator;
1951
1952 scanner = pattern_scanner(pattern, args);
1953 if (!scanner)
1954 return NULL;
1955
1956 search = PyObject_GetAttrString(scanner, "search");
1957 Py_DECREF(scanner);
1958 if (!search)
1959 return NULL;
1960
1961 iterator = PyCallIter_New(search, Py_None);
1962 Py_DECREF(search);
1963
1964 return iterator;
1965}
1966#endif
1967
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001968static PyObject*
1969pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
1970{
1971 SRE_STATE state;
1972 PyObject* list;
1973 PyObject* item;
1974 int status;
1975 int n;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001976 int i;
1977 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001978
1979 PyObject* string;
1980 int maxsplit = 0;
1981 static char* kwlist[] = { "source", "maxsplit", NULL };
1982 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
1983 &string, &maxsplit))
1984 return NULL;
1985
1986 string = state_init(&state, self, string, 0, INT_MAX);
1987 if (!string)
1988 return NULL;
1989
1990 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001991 if (!list) {
1992 state_fini(&state);
1993 return NULL;
1994 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001995
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001996 n = 0;
1997 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001998
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001999 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002000
2001 state_reset(&state);
2002
2003 state.ptr = state.start;
2004
2005 if (state.charsize == 1) {
2006 status = sre_search(&state, PatternObject_GetCode(self));
2007 } else {
2008#if defined(HAVE_UNICODE)
2009 status = sre_usearch(&state, PatternObject_GetCode(self));
2010#endif
2011 }
2012
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002013 if (status <= 0) {
2014 if (status == 0)
2015 break;
2016 pattern_error(status);
2017 goto error;
2018 }
2019
2020 if (state.start == state.ptr) {
2021 if (last == state.end)
2022 break;
2023 /* skip one character */
2024 state.start = (void*) ((char*) state.ptr + state.charsize);
2025 continue;
2026 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002027
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002028 /* get segment before this match */
2029 item = PySequence_GetSlice(
2030 string, STATE_OFFSET(&state, last),
2031 STATE_OFFSET(&state, state.start)
2032 );
2033 if (!item)
2034 goto error;
2035 status = PyList_Append(list, item);
2036 Py_DECREF(item);
2037 if (status < 0)
2038 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002039
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002040 /* add groups (if any) */
2041 for (i = 0; i < self->groups; i++) {
2042 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002043 if (!item)
2044 goto error;
2045 status = PyList_Append(list, item);
2046 Py_DECREF(item);
2047 if (status < 0)
2048 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002049 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002050
2051 n = n + 1;
2052
2053 last = state.start = state.ptr;
2054
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002055 }
2056
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002057 /* get segment following last match (even if empty) */
2058 item = PySequence_GetSlice(
2059 string, STATE_OFFSET(&state, last), state.endpos
2060 );
2061 if (!item)
2062 goto error;
2063 status = PyList_Append(list, item);
2064 Py_DECREF(item);
2065 if (status < 0)
2066 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002067
2068 state_fini(&state);
2069 return list;
2070
2071error:
2072 Py_DECREF(list);
2073 state_fini(&state);
2074 return NULL;
2075
2076}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002077
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002078static PyObject*
2079pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2080 int count, int subn)
2081{
2082 SRE_STATE state;
2083 PyObject* list;
2084 PyObject* item;
2085 PyObject* filter;
2086 PyObject* args;
2087 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002088 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002089 int status;
2090 int n;
2091 int i, b, e;
2092 int filter_is_callable;
2093
Fredrik Lundhdac58492001-10-21 21:48:30 +00002094 if (PyCallable_Check(template)) {
2095 /* sub/subn takes either a function or a template */
2096 filter = template;
2097 Py_INCREF(filter);
2098 filter_is_callable = 1;
2099 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002100 /* if not callable, check if it's a literal string */
2101 int literal;
2102 ptr = getstring(template, &n, &b);
2103 if (ptr) {
2104 if (b == 1) {
2105 literal = sre_literal_template(ptr, n);
2106 } else {
2107#if defined(HAVE_UNICODE)
2108 literal = sre_uliteral_template(ptr, n);
2109#endif
2110 }
2111 } else {
2112 PyErr_Clear();
2113 literal = 0;
2114 }
2115 if (literal) {
2116 filter = template;
2117 Py_INCREF(filter);
2118 filter_is_callable = 0;
2119 } else {
2120 /* not a literal; hand it over to the template compiler */
2121 filter = call(
2122 SRE_MODULE, "_subx",
2123 Py_BuildValue("OO", self, template)
2124 );
2125 if (!filter)
2126 return NULL;
2127 filter_is_callable = PyCallable_Check(filter);
2128 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002129 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002130
2131 string = state_init(&state, self, string, 0, INT_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002132 if (!string) {
2133 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002134 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002135 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002136
2137 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002138 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002139 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002140 state_fini(&state);
2141 return NULL;
2142 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002143
2144 n = i = 0;
2145
2146 while (!count || n < count) {
2147
2148 state_reset(&state);
2149
2150 state.ptr = state.start;
2151
2152 if (state.charsize == 1) {
2153 status = sre_search(&state, PatternObject_GetCode(self));
2154 } else {
2155#if defined(HAVE_UNICODE)
2156 status = sre_usearch(&state, PatternObject_GetCode(self));
2157#endif
2158 }
2159
2160 if (status <= 0) {
2161 if (status == 0)
2162 break;
2163 pattern_error(status);
2164 goto error;
2165 }
2166
2167 b = STATE_OFFSET(&state, state.start);
2168 e = STATE_OFFSET(&state, state.ptr);
2169
2170 if (i < b) {
2171 /* get segment before this match */
2172 item = PySequence_GetSlice(string, i, b);
2173 if (!item)
2174 goto error;
2175 status = PyList_Append(list, item);
2176 Py_DECREF(item);
2177 if (status < 0)
2178 goto error;
2179
2180 } else if (i == b && i == e && n > 0)
2181 /* ignore empty match on latest position */
2182 goto next;
2183
2184 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002185 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002186 match = pattern_new_match(self, &state, 1);
2187 if (!match)
2188 goto error;
2189 args = Py_BuildValue("(O)", match);
2190 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002191 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002192 goto error;
2193 }
2194 item = PyObject_CallObject(filter, args);
2195 Py_DECREF(args);
2196 Py_DECREF(match);
2197 if (!item)
2198 goto error;
2199 } else {
2200 /* filter is literal string */
2201 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002202 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002203 }
2204
2205 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002206 if (item != Py_None) {
2207 status = PyList_Append(list, item);
2208 Py_DECREF(item);
2209 if (status < 0)
2210 goto error;
2211 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002212
2213 i = e;
2214 n = n + 1;
2215
2216next:
2217 /* move on */
2218 if (state.ptr == state.start)
2219 state.start = (void*) ((char*) state.ptr + state.charsize);
2220 else
2221 state.start = state.ptr;
2222
2223 }
2224
2225 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002226 if (i < state.endpos) {
2227 item = PySequence_GetSlice(string, i, state.endpos);
2228 if (!item)
2229 goto error;
2230 status = PyList_Append(list, item);
2231 Py_DECREF(item);
2232 if (status < 0)
2233 goto error;
2234 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002235
2236 state_fini(&state);
2237
Guido van Rossum4e173842001-12-07 04:25:10 +00002238 Py_DECREF(filter);
2239
Fredrik Lundhdac58492001-10-21 21:48:30 +00002240 /* convert list to single string (also removes list) */
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00002241 item = join_list(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002242
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002243 if (!item)
2244 return NULL;
2245
2246 if (subn)
2247 return Py_BuildValue("Ni", item, n);
2248
2249 return item;
2250
2251error:
2252 Py_DECREF(list);
2253 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002254 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002255 return NULL;
2256
2257}
2258
2259static PyObject*
2260pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2261{
2262 PyObject* template;
2263 PyObject* string;
2264 int count = 0;
2265 static char* kwlist[] = { "repl", "string", "count", NULL };
2266 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2267 &template, &string, &count))
2268 return NULL;
2269
2270 return pattern_subx(self, template, string, count, 0);
2271}
2272
2273static PyObject*
2274pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2275{
2276 PyObject* template;
2277 PyObject* string;
2278 int count = 0;
2279 static char* kwlist[] = { "repl", "string", "count", NULL };
2280 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2281 &template, &string, &count))
2282 return NULL;
2283
2284 return pattern_subx(self, template, string, count, 1);
2285}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002286
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002287static PyObject*
2288pattern_copy(PatternObject* self, PyObject* args)
2289{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002290#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002291 PatternObject* copy;
2292 int offset;
2293
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002294 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2295 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002296
2297 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2298 if (!copy)
2299 return NULL;
2300
2301 offset = offsetof(PatternObject, groups);
2302
2303 Py_XINCREF(self->groupindex);
2304 Py_XINCREF(self->indexgroup);
2305 Py_XINCREF(self->pattern);
2306
2307 memcpy((char*) copy + offset, (char*) self + offset,
2308 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2309
2310 return (PyObject*) copy;
2311#else
2312 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2313 return NULL;
2314#endif
2315}
2316
2317static PyObject*
2318pattern_deepcopy(PatternObject* self, PyObject* args)
2319{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002320#ifdef USE_BUILTIN_COPY
2321 PatternObject* copy;
2322
2323 PyObject* memo;
2324 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2325 return NULL;
2326
2327 copy = (PatternObject*) pattern_copy(self, Py_None);
2328 if (!copy)
2329 return NULL;
2330
2331 if (!deepcopy(&copy->groupindex, memo) ||
2332 !deepcopy(&copy->indexgroup, memo) ||
2333 !deepcopy(&copy->pattern, memo)) {
2334 Py_DECREF(copy);
2335 return NULL;
2336 }
2337
2338#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002339 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2340 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002341#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002342}
2343
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002344static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002345 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2346 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2347 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2348 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2349 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2350 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002351#if PY_VERSION_HEX >= 0x02020000
2352 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS},
2353#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002354 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002355 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2356 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002357 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002358};
2359
2360static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002361pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002362{
2363 PyObject* res;
2364
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002365 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002366
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002367 if (res)
2368 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002369
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002370 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002371
2372 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002373 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002374 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002375 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002376 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002377
2378 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002379 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002380
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002381 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002382 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002383
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002384 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002385 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002386 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002387 }
2388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002389 PyErr_SetString(PyExc_AttributeError, name);
2390 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002391}
2392
2393statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002394 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002395 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002396 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002397 (destructor)pattern_dealloc, /*tp_dealloc*/
2398 0, /*tp_print*/
2399 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002400};
2401
2402/* -------------------------------------------------------------------- */
2403/* match methods */
2404
2405static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002406match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002407{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002408 Py_XDECREF(self->regs);
2409 Py_XDECREF(self->string);
2410 Py_DECREF(self->pattern);
2411 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002412}
2413
2414static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002415match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002416{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002417 if (index < 0 || index >= self->groups) {
2418 /* raise IndexError if we were given a bad group number */
2419 PyErr_SetString(
2420 PyExc_IndexError,
2421 "no such group"
2422 );
2423 return NULL;
2424 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002425
Fredrik Lundh6f013982000-07-03 18:44:21 +00002426 index *= 2;
2427
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002428 if (self->string == Py_None || self->mark[index] < 0) {
2429 /* return default value if the string or group is undefined */
2430 Py_INCREF(def);
2431 return def;
2432 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002433
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002434 return PySequence_GetSlice(
2435 self->string, self->mark[index], self->mark[index+1]
2436 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002437}
2438
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002439static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002440match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002441{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002442 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002443
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002444 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002445 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002446
Fredrik Lundh6f013982000-07-03 18:44:21 +00002447 i = -1;
2448
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002449 if (self->pattern->groupindex) {
2450 index = PyObject_GetItem(self->pattern->groupindex, index);
2451 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002452 if (PyInt_Check(index))
2453 i = (int) PyInt_AS_LONG(index);
2454 Py_DECREF(index);
2455 } else
2456 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002457 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002458
2459 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002460}
2461
2462static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002463match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002464{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002465 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002466}
2467
2468static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002469match_expand(MatchObject* self, PyObject* args)
2470{
2471 PyObject* template;
2472 if (!PyArg_ParseTuple(args, "O:expand", &template))
2473 return NULL;
2474
2475 /* delegate to Python code */
2476 return call(
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002477 SRE_MODULE, "_expand",
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002478 Py_BuildValue("OOO", self->pattern, self, template)
2479 );
2480}
2481
2482static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002483match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002484{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002485 PyObject* result;
2486 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002487
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002488 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002489
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002490 switch (size) {
2491 case 0:
2492 result = match_getslice(self, Py_False, Py_None);
2493 break;
2494 case 1:
2495 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2496 break;
2497 default:
2498 /* fetch multiple items */
2499 result = PyTuple_New(size);
2500 if (!result)
2501 return NULL;
2502 for (i = 0; i < size; i++) {
2503 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002504 self, PyTuple_GET_ITEM(args, i), Py_None
2505 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002506 if (!item) {
2507 Py_DECREF(result);
2508 return NULL;
2509 }
2510 PyTuple_SET_ITEM(result, i, item);
2511 }
2512 break;
2513 }
2514 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002515}
2516
2517static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002518match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002519{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002520 PyObject* result;
2521 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002522
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002523 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002524 static char* kwlist[] = { "default", NULL };
2525 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002526 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002527
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002528 result = PyTuple_New(self->groups-1);
2529 if (!result)
2530 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002531
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002532 for (index = 1; index < self->groups; index++) {
2533 PyObject* item;
2534 item = match_getslice_by_index(self, index, def);
2535 if (!item) {
2536 Py_DECREF(result);
2537 return NULL;
2538 }
2539 PyTuple_SET_ITEM(result, index-1, item);
2540 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002541
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002542 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002543}
2544
2545static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002546match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002547{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002548 PyObject* result;
2549 PyObject* keys;
2550 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002551
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002552 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002553 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002554 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002555 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002556
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002557 result = PyDict_New();
2558 if (!result || !self->pattern->groupindex)
2559 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002560
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002561 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002562 if (!keys)
2563 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002564
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002565 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002566 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002567 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002568 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002569 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002570 if (!key)
2571 goto failed;
2572 value = match_getslice(self, key, def);
2573 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002574 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002575 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002576 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002577 status = PyDict_SetItem(result, key, value);
2578 Py_DECREF(value);
2579 if (status < 0)
2580 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002581 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002582
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002583 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002585 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002586
2587failed:
2588 Py_DECREF(keys);
2589 Py_DECREF(result);
2590 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002591}
2592
2593static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002594match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002595{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002596 int index;
2597
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002598 PyObject* index_ = Py_False; /* zero */
2599 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2600 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002601
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002602 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002603
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002604 if (index < 0 || index >= self->groups) {
2605 PyErr_SetString(
2606 PyExc_IndexError,
2607 "no such group"
2608 );
2609 return NULL;
2610 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002611
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002612 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002613 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002614}
2615
2616static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002617match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002618{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002619 int index;
2620
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002621 PyObject* index_ = Py_False; /* zero */
2622 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2623 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002624
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002625 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002626
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002627 if (index < 0 || index >= self->groups) {
2628 PyErr_SetString(
2629 PyExc_IndexError,
2630 "no such group"
2631 );
2632 return NULL;
2633 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002634
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002635 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002636 return Py_BuildValue("i", self->mark[index*2+1]);
2637}
2638
2639LOCAL(PyObject*)
2640_pair(int i1, int i2)
2641{
2642 PyObject* pair;
2643 PyObject* item;
2644
2645 pair = PyTuple_New(2);
2646 if (!pair)
2647 return NULL;
2648
2649 item = PyInt_FromLong(i1);
2650 if (!item)
2651 goto error;
2652 PyTuple_SET_ITEM(pair, 0, item);
2653
2654 item = PyInt_FromLong(i2);
2655 if (!item)
2656 goto error;
2657 PyTuple_SET_ITEM(pair, 1, item);
2658
2659 return pair;
2660
2661 error:
2662 Py_DECREF(pair);
2663 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002664}
2665
2666static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002667match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002668{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002669 int index;
2670
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002671 PyObject* index_ = Py_False; /* zero */
2672 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2673 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002674
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002675 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002676
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002677 if (index < 0 || index >= self->groups) {
2678 PyErr_SetString(
2679 PyExc_IndexError,
2680 "no such group"
2681 );
2682 return NULL;
2683 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002684
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002685 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002686 return _pair(self->mark[index*2], self->mark[index*2+1]);
2687}
2688
2689static PyObject*
2690match_regs(MatchObject* self)
2691{
2692 PyObject* regs;
2693 PyObject* item;
2694 int index;
2695
2696 regs = PyTuple_New(self->groups);
2697 if (!regs)
2698 return NULL;
2699
2700 for (index = 0; index < self->groups; index++) {
2701 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2702 if (!item) {
2703 Py_DECREF(regs);
2704 return NULL;
2705 }
2706 PyTuple_SET_ITEM(regs, index, item);
2707 }
2708
2709 Py_INCREF(regs);
2710 self->regs = regs;
2711
2712 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002713}
2714
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002715static PyObject*
2716match_copy(MatchObject* self, PyObject* args)
2717{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002718#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002719 MatchObject* copy;
2720 int slots, offset;
2721
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002722 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2723 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002724
2725 slots = 2 * (self->pattern->groups+1);
2726
2727 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2728 if (!copy)
2729 return NULL;
2730
2731 /* this value a constant, but any compiler should be able to
2732 figure that out all by itself */
2733 offset = offsetof(MatchObject, string);
2734
2735 Py_XINCREF(self->pattern);
2736 Py_XINCREF(self->string);
2737 Py_XINCREF(self->regs);
2738
2739 memcpy((char*) copy + offset, (char*) self + offset,
2740 sizeof(MatchObject) + slots * sizeof(int) - offset);
2741
2742 return (PyObject*) copy;
2743#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002744 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002745 return NULL;
2746#endif
2747}
2748
2749static PyObject*
2750match_deepcopy(MatchObject* self, PyObject* args)
2751{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002752#ifdef USE_BUILTIN_COPY
2753 MatchObject* copy;
2754
2755 PyObject* memo;
2756 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2757 return NULL;
2758
2759 copy = (MatchObject*) match_copy(self, Py_None);
2760 if (!copy)
2761 return NULL;
2762
2763 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2764 !deepcopy(&copy->string, memo) ||
2765 !deepcopy(&copy->regs, memo)) {
2766 Py_DECREF(copy);
2767 return NULL;
2768 }
2769
2770#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002771 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2772 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002773#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002774}
2775
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002776static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002777 {"group", (PyCFunction) match_group, METH_VARARGS},
2778 {"start", (PyCFunction) match_start, METH_VARARGS},
2779 {"end", (PyCFunction) match_end, METH_VARARGS},
2780 {"span", (PyCFunction) match_span, METH_VARARGS},
2781 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2782 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2783 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002784 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2785 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002786 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002787};
2788
2789static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002790match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002791{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002792 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002793
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002794 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2795 if (res)
2796 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002798 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002800 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002801 if (self->lastindex >= 0)
2802 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002803 Py_INCREF(Py_None);
2804 return Py_None;
2805 }
2806
2807 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002808 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002809 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002810 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002811 );
2812 if (result)
2813 return result;
2814 PyErr_Clear();
2815 }
2816 Py_INCREF(Py_None);
2817 return Py_None;
2818 }
2819
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002820 if (!strcmp(name, "string")) {
2821 if (self->string) {
2822 Py_INCREF(self->string);
2823 return self->string;
2824 } else {
2825 Py_INCREF(Py_None);
2826 return Py_None;
2827 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002828 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002829
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002830 if (!strcmp(name, "regs")) {
2831 if (self->regs) {
2832 Py_INCREF(self->regs);
2833 return self->regs;
2834 } else
2835 return match_regs(self);
2836 }
2837
2838 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002839 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002840 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002841 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002842
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002843 if (!strcmp(name, "pos"))
2844 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002845
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002846 if (!strcmp(name, "endpos"))
2847 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002848
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002849 PyErr_SetString(PyExc_AttributeError, name);
2850 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002851}
2852
2853/* FIXME: implement setattr("string", None) as a special case (to
2854 detach the associated string, if any */
2855
2856statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002857 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002858 0, "_" SRE_MODULE ".SRE_Match",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002859 sizeof(MatchObject), sizeof(int),
2860 (destructor)match_dealloc, /*tp_dealloc*/
2861 0, /*tp_print*/
2862 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002863};
2864
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002865/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002866/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002867
2868static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002869scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002870{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002871 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002872 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002873 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002874}
2875
2876static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002877scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002878{
2879 SRE_STATE* state = &self->state;
2880 PyObject* match;
2881 int status;
2882
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002883 state_reset(state);
2884
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002885 state->ptr = state->start;
2886
2887 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002888 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002889 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002890#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002891 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002892#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002893 }
2894
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002895 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002896 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002897
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002898 if (status == 0 || state->ptr == state->start)
2899 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002900 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002901 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002902
2903 return match;
2904}
2905
2906
2907static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002908scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002909{
2910 SRE_STATE* state = &self->state;
2911 PyObject* match;
2912 int status;
2913
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002914 state_reset(state);
2915
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002916 state->ptr = state->start;
2917
2918 if (state->charsize == 1) {
2919 status = sre_search(state, PatternObject_GetCode(self->pattern));
2920 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002921#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002922 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002923#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002924 }
2925
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002926 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002927 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002928
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002929 if (status == 0 || state->ptr == state->start)
2930 state->start = (void*) ((char*) state->ptr + state->charsize);
2931 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002932 state->start = state->ptr;
2933
2934 return match;
2935}
2936
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002937static PyMethodDef scanner_methods[] = {
Neal Norwitzbb2769f2002-03-31 15:46:00 +00002938 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
2939 /* METH_OLDARGS is not in Python 1.5.2 */
2940 {"match", (PyCFunction) scanner_match, 0},
2941 {"search", (PyCFunction) scanner_search, 0},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002942 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002943};
2944
2945static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002946scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002947{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002948 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002949
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002950 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2951 if (res)
2952 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002953
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002954 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002955
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002956 /* attributes */
2957 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002958 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002959 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002960 }
2961
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002962 PyErr_SetString(PyExc_AttributeError, name);
2963 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002964}
2965
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002966statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002967 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002968 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002969 sizeof(ScannerObject), 0,
2970 (destructor)scanner_dealloc, /*tp_dealloc*/
2971 0, /*tp_print*/
2972 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002973};
2974
Guido van Rossumb700df92000-03-31 14:59:30 +00002975static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002976 {"compile", _compile, METH_VARARGS},
2977 {"getcodesize", sre_codesize, METH_VARARGS},
2978 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002979 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002980};
2981
Mark Hammond8235ea12002-07-19 06:55:41 +00002982PyMODINIT_FUNC init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002983{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002984 PyObject* m;
2985 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002986 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002987
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002988 /* Patch object types */
2989 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002990 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002991
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002992 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002993 d = PyModule_GetDict(m);
2994
Fredrik Lundh21009b92001-09-18 18:47:09 +00002995 x = PyInt_FromLong(SRE_MAGIC);
2996 if (x) {
2997 PyDict_SetItemString(d, "MAGIC", x);
2998 Py_DECREF(x);
2999 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003000
Fredrik Lundh21009b92001-09-18 18:47:09 +00003001 x = PyString_FromString(copyright);
3002 if (x) {
3003 PyDict_SetItemString(d, "copyright", x);
3004 Py_DECREF(x);
3005 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003006}
3007
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003008#endif /* !defined(SRE_RECURSIVE) */