blob: 308b7260b57f96fa7c4e3fd93e02f03849cc397d [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000027 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000029 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000030 * 2001-04-15 fl export copyright as Python attribute, not global
Guido van Rossumb700df92000-03-31 14:59:30 +000031 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000032 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000033 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * This version of the SRE library can be redistributed under CNRI's
35 * Python 1.6 license. For any other use, please contact Secret Labs
36 * AB (info@pythonware.com).
37 *
Guido van Rossumb700df92000-03-31 14:59:30 +000038 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000039 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000040 * other compatibility work.
41 */
42
43#ifndef SRE_RECURSIVE
44
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000045static char copyright[] =
46 " SRE 2.1b2 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000047
48#include "Python.h"
49
50#include "sre.h"
51
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000052#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000053
Fredrik Lundh436c3d52000-06-29 08:58:44 +000054/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000055#if !defined(SRE_MODULE)
56#define SRE_MODULE "sre"
57#endif
Fredrik Lundh436c3d52000-06-29 08:58:44 +000058
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh436c3d52000-06-29 08:58:44 +000062#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000063/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000064#define HAVE_UNICODE
65#endif
66
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000067/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000068/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069
Fredrik Lundh33accc12000-08-27 20:59:47 +000070/* prevent run-away recursion (bad patterns on long strings) */
71
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000072#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000073#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
74/* require smaller recursion limit for a number of 64-bit platforms:
75 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
76/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
77#define USE_RECURSION_LIMIT 7500
78#else
79#define USE_RECURSION_LIMIT 10000
80#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000081#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000082
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000083/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000084#define USE_FAST_SEARCH
85
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000086/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000087#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000089#if PY_VERSION_HEX < 0x01060000
90#define PyObject_DEL(op) PyMem_DEL((op))
91#endif
92
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000093/* -------------------------------------------------------------------- */
94
Fredrik Lundh80946112000-06-29 18:03:25 +000095#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000096#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000097#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000098/* fastest possible local call under MSVC */
99#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000101#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000102#else
103#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000104#endif
105
106/* error codes */
107#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000108#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000109#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000110#define SRE_ERROR_MEMORY -9 /* out of memory */
111
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000112#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000113#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000114#else
115#define TRACE(v)
116#endif
117
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000118/* -------------------------------------------------------------------- */
119/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000120
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000121/* default character predicates (run sre_chars.py to regenerate tables) */
122
123#define SRE_DIGIT_MASK 1
124#define SRE_SPACE_MASK 2
125#define SRE_LINEBREAK_MASK 4
126#define SRE_ALNUM_MASK 8
127#define SRE_WORD_MASK 16
128
129static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1302, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1310, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
13225, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1340, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
136
Fredrik Lundhb389df32000-06-29 12:48:37 +0000137static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000013810, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13927, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
14044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
14161, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
142108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
143122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
144106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
145120, 121, 122, 123, 124, 125, 126, 127 };
146
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000147#define SRE_IS_DIGIT(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
149#define SRE_IS_SPACE(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
151#define SRE_IS_LINEBREAK(ch)\
152 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
153#define SRE_IS_ALNUM(ch)\
154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
155#define SRE_IS_WORD(ch)\
156 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000157
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000158static unsigned int sre_lower(unsigned int ch)
159{
160 return ((ch) < 128 ? sre_char_lower[ch] : ch);
161}
162
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000164
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
166#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
167#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
168#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
169#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
170
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000171static unsigned int sre_lower_locale(unsigned int ch)
172{
173 return ((ch) < 256 ? tolower((ch)) : ch);
174}
175
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000176/* unicode-specific character predicates */
177
178#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000179
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000180#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
181#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
182#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000183#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000184#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000185
186static unsigned int sre_lower_unicode(unsigned int ch)
187{
188 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
189}
190
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000191#endif
192
Guido van Rossumb700df92000-03-31 14:59:30 +0000193LOCAL(int)
194sre_category(SRE_CODE category, unsigned int ch)
195{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000196 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000197
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 case SRE_CATEGORY_DIGIT:
199 return SRE_IS_DIGIT(ch);
200 case SRE_CATEGORY_NOT_DIGIT:
201 return !SRE_IS_DIGIT(ch);
202 case SRE_CATEGORY_SPACE:
203 return SRE_IS_SPACE(ch);
204 case SRE_CATEGORY_NOT_SPACE:
205 return !SRE_IS_SPACE(ch);
206 case SRE_CATEGORY_WORD:
207 return SRE_IS_WORD(ch);
208 case SRE_CATEGORY_NOT_WORD:
209 return !SRE_IS_WORD(ch);
210 case SRE_CATEGORY_LINEBREAK:
211 return SRE_IS_LINEBREAK(ch);
212 case SRE_CATEGORY_NOT_LINEBREAK:
213 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000214
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000215 case SRE_CATEGORY_LOC_WORD:
216 return SRE_LOC_IS_WORD(ch);
217 case SRE_CATEGORY_LOC_NOT_WORD:
218 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000219
220#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000221 case SRE_CATEGORY_UNI_DIGIT:
222 return SRE_UNI_IS_DIGIT(ch);
223 case SRE_CATEGORY_UNI_NOT_DIGIT:
224 return !SRE_UNI_IS_DIGIT(ch);
225 case SRE_CATEGORY_UNI_SPACE:
226 return SRE_UNI_IS_SPACE(ch);
227 case SRE_CATEGORY_UNI_NOT_SPACE:
228 return !SRE_UNI_IS_SPACE(ch);
229 case SRE_CATEGORY_UNI_WORD:
230 return SRE_UNI_IS_WORD(ch);
231 case SRE_CATEGORY_UNI_NOT_WORD:
232 return !SRE_UNI_IS_WORD(ch);
233 case SRE_CATEGORY_UNI_LINEBREAK:
234 return SRE_UNI_IS_LINEBREAK(ch);
235 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
236 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000237#else
238 case SRE_CATEGORY_UNI_DIGIT:
239 return SRE_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_NOT_DIGIT:
241 return !SRE_IS_DIGIT(ch);
242 case SRE_CATEGORY_UNI_SPACE:
243 return SRE_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_NOT_SPACE:
245 return !SRE_IS_SPACE(ch);
246 case SRE_CATEGORY_UNI_WORD:
247 return SRE_LOC_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_NOT_WORD:
249 return !SRE_LOC_IS_WORD(ch);
250 case SRE_CATEGORY_UNI_LINEBREAK:
251 return SRE_IS_LINEBREAK(ch);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
253 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000254#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000255 }
256 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000257}
258
259/* helpers */
260
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000261static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000262mark_fini(SRE_STATE* state)
263{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000264 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000265 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000266 state->mark_stack = NULL;
267 }
268 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000269}
270
271static int
272mark_save(SRE_STATE* state, int lo, int hi)
273{
274 void* stack;
275 int size;
276 int minsize, newsize;
277
278 if (hi <= lo)
279 return 0;
280
281 size = (hi - lo) + 1;
282
283 newsize = state->mark_stack_size;
284 minsize = state->mark_stack_base + size;
285
286 if (newsize < minsize) {
287 /* create new stack */
288 if (!newsize) {
289 newsize = 512;
290 if (newsize < minsize)
291 newsize = minsize;
292 TRACE(("allocate stack %d\n", newsize));
293 stack = malloc(sizeof(void*) * newsize);
294 } else {
295 /* grow the stack */
296 while (newsize < minsize)
297 newsize += newsize;
298 TRACE(("grow stack to %d\n", newsize));
299 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
300 }
301 if (!stack) {
302 mark_fini(state);
303 return SRE_ERROR_MEMORY;
304 }
305 state->mark_stack = stack;
306 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000307 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000308
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000309 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000310
311 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
312 size * sizeof(void*));
313
314 state->mark_stack_base += size;
315
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000316 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000317}
318
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000319static int
320mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000321{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000322 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000323
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000324 if (hi <= lo)
325 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000326
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000327 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000328
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000329 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000330
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000331 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000332
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000333 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
334 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000335
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000336 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000337}
338
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000339/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
341#define SRE_CHAR unsigned char
342#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000343#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000344#define SRE_CHARSET sre_charset
345#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000346#define SRE_MATCH sre_match
347#define SRE_SEARCH sre_search
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000348
349#if defined(HAVE_UNICODE)
350
Guido van Rossumb700df92000-03-31 14:59:30 +0000351#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000352#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000353#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000354
Guido van Rossumb700df92000-03-31 14:59:30 +0000355#undef SRE_SEARCH
356#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000357#undef SRE_INFO
358#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000359#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000360#undef SRE_AT
361#undef SRE_CHAR
362
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000363/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000364
365#define SRE_CHAR Py_UNICODE
366#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000367#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000368#define SRE_CHARSET sre_ucharset
369#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000370#define SRE_MATCH sre_umatch
371#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000372#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000373
374#endif /* SRE_RECURSIVE */
375
376/* -------------------------------------------------------------------- */
377/* String matching engine */
378
379/* the following section is compiled twice, with different character
380 settings */
381
382LOCAL(int)
383SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
384{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000387 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000389 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000390
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000391 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000392 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 case SRE_AT_BEGINNING_LINE:
396 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000397 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000400 return (((void*) (ptr+1) == state->end &&
401 SRE_IS_LINEBREAK((int) ptr[0])) ||
402 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000403
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 case SRE_AT_END_LINE:
405 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000406 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000407
Fredrik Lundh770617b2001-01-14 15:06:11 +0000408 case SRE_AT_END_STRING:
409 return ((void*) ptr == state->end);
410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 case SRE_AT_BOUNDARY:
412 if (state->beginning == state->end)
413 return 0;
414 that = ((void*) ptr > state->beginning) ?
415 SRE_IS_WORD((int) ptr[-1]) : 0;
416 this = ((void*) ptr < state->end) ?
417 SRE_IS_WORD((int) ptr[0]) : 0;
418 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000420 case SRE_AT_NON_BOUNDARY:
421 if (state->beginning == state->end)
422 return 0;
423 that = ((void*) ptr > state->beginning) ?
424 SRE_IS_WORD((int) ptr[-1]) : 0;
425 this = ((void*) ptr < state->end) ?
426 SRE_IS_WORD((int) ptr[0]) : 0;
427 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000428
429 case SRE_AT_LOC_BOUNDARY:
430 if (state->beginning == state->end)
431 return 0;
432 that = ((void*) ptr > state->beginning) ?
433 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
434 this = ((void*) ptr < state->end) ?
435 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
436 return this != that;
437
438 case SRE_AT_LOC_NON_BOUNDARY:
439 if (state->beginning == state->end)
440 return 0;
441 that = ((void*) ptr > state->beginning) ?
442 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
443 this = ((void*) ptr < state->end) ?
444 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
445 return this == that;
446
447 case SRE_AT_UNI_BOUNDARY:
448 if (state->beginning == state->end)
449 return 0;
450 that = ((void*) ptr > state->beginning) ?
451 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
452 this = ((void*) ptr < state->end) ?
453 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
454 return this != that;
455
456 case SRE_AT_UNI_NON_BOUNDARY:
457 if (state->beginning == state->end)
458 return 0;
459 that = ((void*) ptr > state->beginning) ?
460 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
461 this = ((void*) ptr < state->end) ?
462 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
463 return this == that;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000464 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000465
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000466 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000467}
468
469LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000470SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000471{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000472 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000473
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000474 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000475
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000476 for (;;) {
477 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000478
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000479 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000480 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000481 if (ch == set[0])
482 return ok;
483 set++;
484 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000485
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000486 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000487 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000488 if (set[0] <= ch && ch <= set[1])
489 return ok;
490 set += 2;
491 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000492
Fredrik Lundh3562f112000-07-02 12:00:07 +0000493 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000494 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000495 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
496 return ok;
497 set += 16;
498 break;
499
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000501 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000502 if (sre_category(set[0], (int) ch))
503 return ok;
504 set += 1;
505 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000506
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000507 case SRE_OP_NEGATE:
508 ok = !ok;
509 break;
510
511 case SRE_OP_FAILURE:
512 return !ok;
513
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000514 default:
515 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000516 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000517 return 0;
518 }
519 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000520}
521
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000522LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
523
524LOCAL(int)
525SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
526{
527 SRE_CODE chr;
528 SRE_CHAR* ptr = state->ptr;
529 SRE_CHAR* end = state->end;
530 int i;
531
532 /* adjust end */
533 if (maxcount < end - ptr && maxcount != 65535)
534 end = ptr + maxcount;
535
536 switch (pattern[0]) {
537
538 case SRE_OP_ANY:
539 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000540 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000541 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
542 ptr++;
543 break;
544
545 case SRE_OP_ANY_ALL:
546 /* repeated dot wildcare. skip to the end of the target
547 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000548 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000549 ptr = end;
550 break;
551
552 case SRE_OP_LITERAL:
553 /* repeated literal */
554 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000555 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000556 while (ptr < end && (SRE_CODE) *ptr == chr)
557 ptr++;
558 break;
559
560 case SRE_OP_LITERAL_IGNORE:
561 /* repeated literal */
562 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000563 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000564 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
565 ptr++;
566 break;
567
568 case SRE_OP_NOT_LITERAL:
569 /* repeated non-literal */
570 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000571 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000572 while (ptr < end && (SRE_CODE) *ptr != chr)
573 ptr++;
574 break;
575
576 case SRE_OP_NOT_LITERAL_IGNORE:
577 /* repeated non-literal */
578 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000579 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000580 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
581 ptr++;
582 break;
583
584 case SRE_OP_IN:
585 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000586 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
587 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000588 ptr++;
589 break;
590
591 default:
592 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000593 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000594 while ((SRE_CHAR*) state->ptr < end) {
595 i = SRE_MATCH(state, pattern, level);
596 if (i < 0)
597 return i;
598 if (!i)
599 break;
600 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000601 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
602 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000603 return (SRE_CHAR*) state->ptr - ptr;
604 }
605
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000606 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000607 return ptr - (SRE_CHAR*) state->ptr;
608}
609
Fredrik Lundh33accc12000-08-27 20:59:47 +0000610#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000611LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
613{
614 /* check if an SRE_OP_INFO block matches at the current position.
615 returns the number of SRE_CODE objects to skip if successful, 0
616 if no match */
617
618 SRE_CHAR* end = state->end;
619 SRE_CHAR* ptr = state->ptr;
620 int i;
621
622 /* check minimal length */
623 if (pattern[3] && (end - ptr) < pattern[3])
624 return 0;
625
626 /* check known prefix */
627 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
628 /* <length> <skip> <prefix data> <overlap data> */
629 for (i = 0; i < pattern[5]; i++)
630 if ((SRE_CODE) ptr[i] != pattern[7 + i])
631 return 0;
632 return pattern[0] + 2 * pattern[6];
633 }
634 return pattern[0];
635}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000636#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000637
638LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000639SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000640{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000641 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000642 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000643
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000644 SRE_CHAR* end = state->end;
645 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000646 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000647 SRE_REPEAT* rp;
648 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000649 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000650
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000651 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000652
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000653 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000654
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000655#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000656 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000657 return SRE_ERROR_RECURSION_LIMIT;
658#endif
659
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000660#if defined(USE_RECURSION_LIMIT)
661 if (level > USE_RECURSION_LIMIT)
662 return SRE_ERROR_RECURSION_LIMIT;
663#endif
664
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000665 if (pattern[0] == SRE_OP_INFO) {
666 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000667 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000668 if (pattern[3] && (end - ptr) < pattern[3]) {
669 TRACE(("reject (got %d chars, need %d)\n",
670 (end - ptr), pattern[3]));
671 return 0;
672 }
673 pattern += pattern[1] + 1;
674 }
675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000677
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000679
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000680 case SRE_OP_FAILURE:
681 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000682 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000685 case SRE_OP_SUCCESS:
686 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000687 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000688 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000689 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000690
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000691 case SRE_OP_AT:
692 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000693 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000694 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000696 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000697 pattern++;
698 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000699
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000700 case SRE_OP_CATEGORY:
701 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000702 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000703 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000704 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000705 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000706 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000707 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000708 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000709
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710 case SRE_OP_LITERAL:
711 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000712 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000713 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000714 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000715 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000716 pattern++;
717 ptr++;
718 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000719
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000720 case SRE_OP_NOT_LITERAL:
721 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000722 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000723 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000724 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000725 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000726 pattern++;
727 ptr++;
728 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000729
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000730 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000731 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000732 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000733 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000734 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
735 return 0;
736 ptr++;
737 break;
738
739 case SRE_OP_ANY_ALL:
740 /* match anything */
741 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000742 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000744 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 ptr++;
746 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000747
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 case SRE_OP_IN:
749 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000750 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000751 TRACE(("|%p|%p|IN\n", pattern, ptr));
752 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000753 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000754 pattern += pattern[0];
755 ptr++;
756 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000757
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000758 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000760 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000761 i = pattern[0];
762 {
763 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
764 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
765 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000766 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 while (p < e) {
768 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000769 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000770 p++; ptr++;
771 }
772 }
773 pattern++;
774 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000775
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000776 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000777 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000778 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000779 i = pattern[0];
780 {
781 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
782 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
783 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000784 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000785 while (p < e) {
786 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000787 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000788 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000789 p++; ptr++;
790 }
791 }
792 pattern++;
793 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000794
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000795 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000796 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000797 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000798 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000799 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 pattern++;
801 ptr++;
802 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000803
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000805 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000806 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000807 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000808 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000809 pattern++;
810 ptr++;
811 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000812
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000813 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000814 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000816 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000817 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000818 pattern += pattern[0];
819 ptr++;
820 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000821
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000822 case SRE_OP_MARK:
823 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000824 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000825 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000826 i = pattern[0];
827 if (i & 1)
828 state->lastindex = i/2 + 1;
829 if (i > state->lastmark)
830 state->lastmark = i;
831 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000832 pattern++;
833 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000834
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000835 case SRE_OP_JUMP:
836 case SRE_OP_INFO:
837 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000838 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000839 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000840 pattern += pattern[0];
841 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000842
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000843 case SRE_OP_ASSERT:
844 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000845 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000846 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000847 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000848 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000849 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000850 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000851 if (i <= 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000852 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000853 pattern += pattern[0];
854 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000855
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000856 case SRE_OP_ASSERT_NOT:
857 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000858 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000859 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000860 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000861 if (state->ptr >= state->beginning) {
862 i = SRE_MATCH(state, pattern + 2, level + 1);
863 if (i < 0)
864 return i;
865 if (i)
866 return 0;
867 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000868 pattern += pattern[0];
869 break;
870
871 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000872 /* alternation */
873 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000874 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000875 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000876 for (; pattern[0]; pattern += pattern[0]) {
877 if (pattern[1] == SRE_OP_LITERAL &&
878 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
879 continue;
880 if (pattern[1] == SRE_OP_IN &&
881 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
882 continue;
883 state->ptr = ptr;
884 i = SRE_MATCH(state, pattern + 1, level + 1);
885 if (i)
886 return i;
887 if (state->lastmark > lastmark) {
888 memset(
889 state->mark + lastmark + 1, 0,
890 (state->lastmark - lastmark) * sizeof(void*)
891 );
892 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000893 }
894 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000895 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000896
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000897 case SRE_OP_REPEAT_ONE:
898 /* match repeated sequence (maximizing regexp) */
899
900 /* this operator only works if the repeated item is
901 exactly one character wide, and we're not already
902 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000903 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000904
905 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
906
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000907 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000908 pattern[1], pattern[2]));
909
Fredrik Lundhe1869832000-08-01 22:47:49 +0000910 if (ptr + pattern[1] > end)
911 return 0; /* cannot match */
912
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000913 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000914
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000915 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
916 if (count < 0)
917 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000918
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000919 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000920
921 /* when we arrive here, count contains the number of
922 matches, and ptr points to the tail of the target
923 string. check if the rest of the pattern matches,
924 and backtrack if not. */
925
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000926 if (count < (int) pattern[1])
927 return 0;
928
929 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
930 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000931 state->ptr = ptr;
932 return 1;
933
934 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
935 /* tail starts with a literal. skip positions where
936 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000937 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000938 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000939 while (count >= (int) pattern[1] &&
940 (ptr >= end || *ptr != chr)) {
941 ptr--;
942 count--;
943 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000944 if (count < (int) pattern[1])
945 break;
946 state->ptr = ptr;
947 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000948 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000949 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000950 ptr--;
951 count--;
952 }
953
954 } else {
955 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000956 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000957 while (count >= (int) pattern[1]) {
958 state->ptr = ptr;
959 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000960 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000961 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000962 ptr--;
963 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000964 if (state->lastmark > lastmark) {
965 memset(
966 state->mark + lastmark + 1, 0,
967 (state->lastmark - lastmark) * sizeof(void*)
968 );
969 state->lastmark = lastmark;
970 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000971 }
972 }
973 return 0;
974
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000975 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000976 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +0000977 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000978 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000979 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000980 pattern[1], pattern[2]));
981
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000982 rep.count = -1;
983 rep.pattern = pattern;
984
985 /* install new repeat context */
986 rep.prev = state->repeat;
987 state->repeat = &rep;
988
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000989 state->ptr = ptr;
990 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000991
992 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000993
994 return i;
995
996 case SRE_OP_MAX_UNTIL:
997 /* maximizing repeat */
998 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
999
1000 /* FIXME: we probably need to deal with zero-width
1001 matches in here... */
1002
1003 rp = state->repeat;
1004 if (!rp)
1005 return SRE_ERROR_STATE;
1006
1007 state->ptr = ptr;
1008
1009 count = rp->count + 1;
1010
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001011 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001012
1013 if (count < rp->pattern[1]) {
1014 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001015 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001016 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001017 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001018 if (i)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001019 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001020 rp->count = count - 1;
1021 state->ptr = ptr;
1022 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001023 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001024
1025 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001026 /* we may have enough matches, but if we can
1027 match another item, do so */
1028 rp->count = count;
1029 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001030 i = mark_save(state, 0, lastmark);
1031 if (i < 0)
1032 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001033 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001034 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001035 if (i)
1036 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001037 i = mark_restore(state, 0, lastmark);
1038 if (i < 0)
1039 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001040 rp->count = count - 1;
1041 state->ptr = ptr;
1042 }
1043
1044 /* cannot match more repeated items here. make sure the
1045 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001046 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001047 i = SRE_MATCH(state, pattern, level + 1);
1048 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001049 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001050 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001051 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001052 return 0;
1053
1054 case SRE_OP_MIN_UNTIL:
1055 /* minimizing repeat */
1056 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1057
1058 rp = state->repeat;
1059 if (!rp)
1060 return SRE_ERROR_STATE;
1061
1062 count = rp->count + 1;
1063
Fredrik Lundh770617b2001-01-14 15:06:11 +00001064 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1065 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001066
1067 state->ptr = ptr;
1068
1069 if (count < rp->pattern[1]) {
1070 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001071 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001072 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001073 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001074 if (i)
1075 return i;
1076 rp->count = count-1;
1077 state->ptr = ptr;
1078 return 0;
1079 }
1080
1081 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001082 state->repeat = rp->prev;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +00001083 /* FIXME: the following fix doesn't always work (#133283) */
1084 if (0 && rp->pattern[2] == 65535) {
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001085 /* unbounded repeat */
1086 for (;;) {
1087 i = SRE_MATCH(state, pattern, level + 1);
1088 if (i || ptr >= end)
1089 break;
1090 state->ptr = ++ptr;
1091 }
1092 } else
1093 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001094 if (i) {
1095 /* free(rp); */
1096 return i;
1097 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001098
Fredrik Lundh770617b2001-01-14 15:06:11 +00001099 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001100 state->repeat = rp;
1101
1102 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1103 return 0;
1104
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001105 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001106 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001107 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001108 if (i)
1109 return i;
1110 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001111 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001112 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001113
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001114 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001115 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001116 return SRE_ERROR_ILLEGAL;
1117 }
1118 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001119
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001120 /* shouldn't end up here */
1121 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001122}
1123
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001124LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001125SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1126{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001127 SRE_CHAR* ptr = state->start;
1128 SRE_CHAR* end = state->end;
1129 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001130 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001131 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001132 SRE_CODE* prefix = NULL;
1133 SRE_CODE* charset = NULL;
1134 SRE_CODE* overlap = NULL;
1135 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001136
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001137 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001138 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001139 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001140
1141 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001142
1143 if (pattern[3] > 0) {
1144 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001145 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001146 end -= pattern[3]-1;
1147 if (end <= ptr)
1148 end = ptr+1;
1149 }
1150
Fredrik Lundh3562f112000-07-02 12:00:07 +00001151 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001152 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001153 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001154 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001155 prefix_skip = pattern[6];
1156 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001157 overlap = prefix + prefix_len - 1;
1158 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001159 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001160 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001161 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001162
1163 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001164 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001165
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001166 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1167 TRACE(("charset = %p\n", charset));
1168
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001169#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001170 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001171 /* pattern starts with a known prefix. use the overlap
1172 table to skip forward as fast as we possibly can */
1173 int i = 0;
1174 end = state->end;
1175 while (ptr < end) {
1176 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001177 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001178 if (!i)
1179 break;
1180 else
1181 i = overlap[i];
1182 } else {
1183 if (++i == prefix_len) {
1184 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001185 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1186 state->start = ptr + 1 - prefix_len;
1187 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001188 if (flags & SRE_INFO_LITERAL)
1189 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001190 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001191 if (status != 0)
1192 return status;
1193 /* close but no cigar -- try again */
1194 i = overlap[i];
1195 }
1196 break;
1197 }
1198
1199 }
1200 ptr++;
1201 }
1202 return 0;
1203 }
1204#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001205
Fredrik Lundh3562f112000-07-02 12:00:07 +00001206 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001207 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001208 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001209 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001210 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001211 for (;;) {
1212 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1213 ptr++;
1214 if (ptr == end)
1215 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001216 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001217 state->start = ptr;
1218 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001219 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001220 if (status != 0)
1221 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001222 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001223 } else if (charset) {
1224 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001225 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001226 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001227 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001228 ptr++;
1229 if (ptr == end)
1230 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001231 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001232 state->start = ptr;
1233 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001234 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001235 if (status != 0)
1236 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001237 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001238 }
1239 } else
1240 /* general case */
1241 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001242 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001243 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001244 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001245 if (status != 0)
1246 break;
1247 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001248
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001249 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001250}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001251
Guido van Rossumb700df92000-03-31 14:59:30 +00001252
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001253#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001254
1255/* -------------------------------------------------------------------- */
1256/* factories and destructors */
1257
1258/* see sre.h for object declarations */
1259
1260staticforward PyTypeObject Pattern_Type;
1261staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001262staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001263
1264static PyObject *
1265_compile(PyObject* self_, PyObject* args)
1266{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001267 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001268
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001269 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001270 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001271
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001272 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001273 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001274 PyObject* code;
1275 int groups = 0;
1276 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001277 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001278 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1279 &PyList_Type, &code, &groups,
1280 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001281 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001282
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001283 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001284
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001285 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001286 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001287 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001288
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001289 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001290 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001291 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001292 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001293
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001294 if (PyErr_Occurred()) {
1295 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001296 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001297 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001298
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001299 Py_INCREF(pattern);
1300 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001301
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001302 self->flags = flags;
1303
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001304 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001305
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001306 Py_XINCREF(groupindex);
1307 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001308
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001309 Py_XINCREF(indexgroup);
1310 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001311
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001312 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001313}
1314
1315static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001316sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001317{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001318 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001319}
1320
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001321static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001322sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001323{
1324 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001325 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001326 return NULL;
1327 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001328 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001329 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001330#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001331 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001332#else
1333 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001334#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001335 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001336}
1337
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001338LOCAL(void)
1339state_reset(SRE_STATE* state)
1340{
1341 int i;
1342
1343 state->lastmark = 0;
1344
1345 /* FIXME: dynamic! */
1346 for (i = 0; i < SRE_MARK_SIZE; i++)
1347 state->mark[i] = NULL;
1348
1349 state->lastindex = -1;
1350
1351 state->repeat = NULL;
1352
1353 mark_fini(state);
1354}
1355
Guido van Rossumb700df92000-03-31 14:59:30 +00001356LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1358 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001359{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001360 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001361
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001362 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001363 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001364 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001365
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001366 memset(state, 0, sizeof(SRE_STATE));
1367
1368 state->lastindex = -1;
1369
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001370#if defined(HAVE_UNICODE)
1371 if (PyUnicode_Check(string)) {
1372 /* unicode strings doesn't always support the buffer interface */
1373 ptr = (void*) PyUnicode_AS_DATA(string);
1374 bytes = PyUnicode_GET_DATA_SIZE(string);
1375 size = PyUnicode_GET_SIZE(string);
1376 state->charsize = sizeof(Py_UNICODE);
1377
1378 } else {
1379#endif
1380
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001381 /* get pointer to string buffer */
1382 buffer = string->ob_type->tp_as_buffer;
1383 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1384 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001385 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001386 return NULL;
1387 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001389 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001390 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1391 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001392 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1393 return NULL;
1394 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001395
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001396 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001397#if PY_VERSION_HEX >= 0x01060000
1398 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001399#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001400 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001401#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001402
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001403 if (PyString_Check(string) || bytes == size)
1404 state->charsize = 1;
1405#if defined(HAVE_UNICODE)
1406 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1407 state->charsize = sizeof(Py_UNICODE);
1408#endif
1409 else {
1410 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1411 return NULL;
1412 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001413
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001414#if defined(HAVE_UNICODE)
1415 }
1416#endif
1417
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001418 /* adjust boundaries */
1419 if (start < 0)
1420 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001421 else if (start > size)
1422 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001423
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001424 if (end < 0)
1425 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001426 else if (end > size)
1427 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001429 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001430
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001431 state->start = (void*) ((char*) ptr + start * state->charsize);
1432 state->end = (void*) ((char*) ptr + end * state->charsize);
1433
1434 Py_INCREF(string);
1435 state->string = string;
1436 state->pos = start;
1437 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001438
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001439 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001440 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001441 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001442#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001443 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001444#else
1445 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001446#endif
1447 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001448 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001449
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001450 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001451}
1452
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001453LOCAL(void)
1454state_fini(SRE_STATE* state)
1455{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001456 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001457 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001458}
1459
1460LOCAL(PyObject*)
1461state_getslice(SRE_STATE* state, int index, PyObject* string)
1462{
Fredrik Lundh58100642000-08-09 09:14:35 +00001463 int i, j;
1464
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001465 index = (index - 1) * 2;
1466
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001467 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001468 i = j = 0;
1469 } else {
1470 i = ((char*)state->mark[index] - (char*)state->beginning) /
1471 state->charsize;
1472 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1473 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001474 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001475
Fredrik Lundh58100642000-08-09 09:14:35 +00001476 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001477}
1478
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001479static void
1480pattern_error(int status)
1481{
1482 switch (status) {
1483 case SRE_ERROR_RECURSION_LIMIT:
1484 PyErr_SetString(
1485 PyExc_RuntimeError,
1486 "maximum recursion limit exceeded"
1487 );
1488 break;
1489 case SRE_ERROR_MEMORY:
1490 PyErr_NoMemory();
1491 break;
1492 default:
1493 /* other error codes indicate compiler/engine bugs */
1494 PyErr_SetString(
1495 PyExc_RuntimeError,
1496 "internal error in regular expression engine"
1497 );
1498 }
1499}
1500
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001501static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001502pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001503{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001504 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001505
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001506 MatchObject* match;
1507 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001508 char* base;
1509 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001510
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001511 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001512
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001513 /* create match object (with room for extra group marks) */
1514 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001515 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001516 if (!match)
1517 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001518
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001519 Py_INCREF(pattern);
1520 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001521
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001522 Py_INCREF(state->string);
1523 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001524
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001525 match->regs = NULL;
1526 match->groups = pattern->groups+1;
1527
1528 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001529
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001530 base = (char*) state->beginning;
1531 n = state->charsize;
1532
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001533 match->mark[0] = ((char*) state->start - base) / n;
1534 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001535
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001536 for (i = j = 0; i < pattern->groups; i++, j+=2)
1537 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1538 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1539 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1540 } else
1541 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1542
1543 match->pos = state->pos;
1544 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001545
Fredrik Lundh6f013982000-07-03 18:44:21 +00001546 match->lastindex = state->lastindex;
1547
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001548 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001549
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001550 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001551
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001552 /* no match */
1553 Py_INCREF(Py_None);
1554 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001555
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001556 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001557
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001558 /* internal error */
1559 pattern_error(status);
1560 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001561}
1562
1563static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001564pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001565{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001566 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001567
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001568 ScannerObject* self;
1569
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001570 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001571 int start = 0;
1572 int end = INT_MAX;
1573 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1574 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001575
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001576 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001577 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001578 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001579 return NULL;
1580
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001581 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001582 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001583 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001584 return NULL;
1585 }
1586
1587 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001588 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001589
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001590 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001591}
1592
Guido van Rossumb700df92000-03-31 14:59:30 +00001593static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001594pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001595{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001596 Py_XDECREF(self->pattern);
1597 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001598 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001599 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001600}
1601
1602static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001603pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001604{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001605 SRE_STATE state;
1606 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001607
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001608 PyObject* string;
1609 int start = 0;
1610 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001611 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1612 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1613 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001614 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001615
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001616 string = state_init(&state, self, string, start, end);
1617 if (!string)
1618 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001619
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001620 state.ptr = state.start;
1621
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001622 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1623
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001624 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001625 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001626 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001627#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001628 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001629#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001630 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001631
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001632 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1633
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001634 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001635
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001636 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001637}
1638
1639static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001640pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001641{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001642 SRE_STATE state;
1643 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001644
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001645 PyObject* string;
1646 int start = 0;
1647 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001648 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1649 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1650 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001651 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001652
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001653 string = state_init(&state, self, string, start, end);
1654 if (!string)
1655 return NULL;
1656
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001657 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1658
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001659 if (state.charsize == 1) {
1660 status = sre_search(&state, PatternObject_GetCode(self));
1661 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001662#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001663 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001664#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001665 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001666
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001667 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1668
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001669 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001670
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001671 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001672}
1673
1674static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001675call(char* function, PyObject* args)
1676{
1677 PyObject* name;
1678 PyObject* module;
1679 PyObject* func;
1680 PyObject* result;
1681
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001682 name = PyString_FromString(SRE_MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001683 if (!name)
1684 return NULL;
1685 module = PyImport_Import(name);
1686 Py_DECREF(name);
1687 if (!module)
1688 return NULL;
1689 func = PyObject_GetAttrString(module, function);
1690 Py_DECREF(module);
1691 if (!func)
1692 return NULL;
1693 result = PyObject_CallObject(func, args);
1694 Py_DECREF(func);
1695 Py_DECREF(args);
1696 return result;
1697}
1698
1699static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001700pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001701{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 PyObject* template;
1703 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001704 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001705 static char* kwlist[] = { "repl", "string", "count", NULL };
1706 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1707 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001708 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001709
1710 /* delegate to Python code */
1711 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1712}
1713
1714static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001715pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001716{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001717 PyObject* template;
1718 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001719 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001720 static char* kwlist[] = { "repl", "string", "count", NULL };
1721 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1722 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001723 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001724
1725 /* delegate to Python code */
1726 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1727}
1728
1729static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001730pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001731{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001732 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001733 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001734 static char* kwlist[] = { "source", "maxsplit", NULL };
1735 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1736 &string, &maxsplit))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001737 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001738
1739 /* delegate to Python code */
1740 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1741}
1742
1743static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001744pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001745{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001746 SRE_STATE state;
1747 PyObject* list;
1748 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001749 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001750
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001751 PyObject* string;
1752 int start = 0;
1753 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001754 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1755 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1756 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001757 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001759 string = state_init(&state, self, string, start, end);
1760 if (!string)
1761 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001762
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001763 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001764
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001765 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001766
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001767 PyObject* item;
1768
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001769 state_reset(&state);
1770
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001771 state.ptr = state.start;
1772
1773 if (state.charsize == 1) {
1774 status = sre_search(&state, PatternObject_GetCode(self));
1775 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001776#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001777 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001778#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001779 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001780
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001781 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001782
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001783 /* don't bother to build a match object */
1784 switch (self->groups) {
1785 case 0:
1786 item = PySequence_GetSlice(
1787 string,
1788 ((char*) state.start - (char*) state.beginning) /
1789 state.charsize,
1790 ((char*) state.ptr - (char*) state.beginning) /
1791 state.charsize);
1792 if (!item)
1793 goto error;
1794 break;
1795 case 1:
1796 item = state_getslice(&state, 1, string);
1797 if (!item)
1798 goto error;
1799 break;
1800 default:
1801 item = PyTuple_New(self->groups);
1802 if (!item)
1803 goto error;
1804 for (i = 0; i < self->groups; i++) {
1805 PyObject* o = state_getslice(&state, i+1, string);
1806 if (!o) {
1807 Py_DECREF(item);
1808 goto error;
1809 }
1810 PyTuple_SET_ITEM(item, i, o);
1811 }
1812 break;
1813 }
1814
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001815 status = PyList_Append(list, item);
1816 Py_DECREF(item);
1817
1818 if (status < 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001819 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001820
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001821 if (state.ptr == state.start)
1822 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001823 else
1824 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001825
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001826 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001827
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001828 if (status == 0)
1829 break;
1830
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001831 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001832 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001833
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001834 }
1835 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001836
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001837 state_fini(&state);
1838 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001839
1840error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001841 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001842 state_fini(&state);
1843 return NULL;
1844
Guido van Rossumb700df92000-03-31 14:59:30 +00001845}
1846
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001847static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00001848 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1849 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1850 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1851 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1852 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1853 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001854 /* experimental */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001855 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001856 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001857};
1858
1859static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001860pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001861{
1862 PyObject* res;
1863
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001864 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001865
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001866 if (res)
1867 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001868
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001869 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001870
1871 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001873 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001874 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001875 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001876
1877 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001878 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001879
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001880 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001881 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001882
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001883 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001884 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001885 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001886 }
1887
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001888 PyErr_SetString(PyExc_AttributeError, name);
1889 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001890}
1891
1892statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 PyObject_HEAD_INIT(NULL)
1894 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001895 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001896 (destructor)pattern_dealloc, /*tp_dealloc*/
1897 0, /*tp_print*/
1898 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001899};
1900
1901/* -------------------------------------------------------------------- */
1902/* match methods */
1903
1904static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001905match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001906{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001907 Py_XDECREF(self->regs);
1908 Py_XDECREF(self->string);
1909 Py_DECREF(self->pattern);
1910 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001911}
1912
1913static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001914match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001915{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001916 if (index < 0 || index >= self->groups) {
1917 /* raise IndexError if we were given a bad group number */
1918 PyErr_SetString(
1919 PyExc_IndexError,
1920 "no such group"
1921 );
1922 return NULL;
1923 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001924
Fredrik Lundh6f013982000-07-03 18:44:21 +00001925 index *= 2;
1926
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001927 if (self->string == Py_None || self->mark[index] < 0) {
1928 /* return default value if the string or group is undefined */
1929 Py_INCREF(def);
1930 return def;
1931 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001932
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001933 return PySequence_GetSlice(
1934 self->string, self->mark[index], self->mark[index+1]
1935 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001936}
1937
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001938static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001939match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001940{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001941 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001942
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001943 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001944 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001945
Fredrik Lundh6f013982000-07-03 18:44:21 +00001946 i = -1;
1947
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001948 if (self->pattern->groupindex) {
1949 index = PyObject_GetItem(self->pattern->groupindex, index);
1950 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001951 if (PyInt_Check(index))
1952 i = (int) PyInt_AS_LONG(index);
1953 Py_DECREF(index);
1954 } else
1955 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001956 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001957
1958 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001959}
1960
1961static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001962match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001963{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001964 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001965}
1966
1967static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001968match_expand(MatchObject* self, PyObject* args)
1969{
1970 PyObject* template;
1971 if (!PyArg_ParseTuple(args, "O:expand", &template))
1972 return NULL;
1973
1974 /* delegate to Python code */
1975 return call(
1976 "_expand",
1977 Py_BuildValue("OOO", self->pattern, self, template)
1978 );
1979}
1980
1981static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001982match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001983{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001984 PyObject* result;
1985 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001986
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001987 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001988
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001989 switch (size) {
1990 case 0:
1991 result = match_getslice(self, Py_False, Py_None);
1992 break;
1993 case 1:
1994 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1995 break;
1996 default:
1997 /* fetch multiple items */
1998 result = PyTuple_New(size);
1999 if (!result)
2000 return NULL;
2001 for (i = 0; i < size; i++) {
2002 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002003 self, PyTuple_GET_ITEM(args, i), Py_None
2004 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002005 if (!item) {
2006 Py_DECREF(result);
2007 return NULL;
2008 }
2009 PyTuple_SET_ITEM(result, i, item);
2010 }
2011 break;
2012 }
2013 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002014}
2015
2016static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002017match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002018{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002019 PyObject* result;
2020 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002021
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002022 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002023 static char* kwlist[] = { "default", NULL };
2024 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002025 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002026
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002027 result = PyTuple_New(self->groups-1);
2028 if (!result)
2029 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002030
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002031 for (index = 1; index < self->groups; index++) {
2032 PyObject* item;
2033 item = match_getslice_by_index(self, index, def);
2034 if (!item) {
2035 Py_DECREF(result);
2036 return NULL;
2037 }
2038 PyTuple_SET_ITEM(result, index-1, item);
2039 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002040
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002041 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002042}
2043
2044static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002045match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002046{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002047 PyObject* result;
2048 PyObject* keys;
2049 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002050
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002051 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002052 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002053 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002055
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002056 result = PyDict_New();
2057 if (!result || !self->pattern->groupindex)
2058 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002059
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002061 if (!keys)
2062 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002065 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002067 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002068 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002069 if (!key)
2070 goto failed;
2071 value = match_getslice(self, key, def);
2072 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002073 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002074 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002075 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002076 status = PyDict_SetItem(result, key, value);
2077 Py_DECREF(value);
2078 if (status < 0)
2079 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002080 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002081
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002083
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002084 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002085
2086failed:
2087 Py_DECREF(keys);
2088 Py_DECREF(result);
2089 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002090}
2091
2092static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002093match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002094{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002095 int index;
2096
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002097 PyObject* index_ = Py_False; /* zero */
2098 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2099 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002100
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002101 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002102
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002103 if (index < 0 || index >= self->groups) {
2104 PyErr_SetString(
2105 PyExc_IndexError,
2106 "no such group"
2107 );
2108 return NULL;
2109 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002110
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002111 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002112 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002113}
2114
2115static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002116match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002117{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002118 int index;
2119
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002120 PyObject* index_ = Py_False; /* zero */
2121 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2122 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002123
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002124 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002125
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002126 if (index < 0 || index >= self->groups) {
2127 PyErr_SetString(
2128 PyExc_IndexError,
2129 "no such group"
2130 );
2131 return NULL;
2132 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002133
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002134 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002135 return Py_BuildValue("i", self->mark[index*2+1]);
2136}
2137
2138LOCAL(PyObject*)
2139_pair(int i1, int i2)
2140{
2141 PyObject* pair;
2142 PyObject* item;
2143
2144 pair = PyTuple_New(2);
2145 if (!pair)
2146 return NULL;
2147
2148 item = PyInt_FromLong(i1);
2149 if (!item)
2150 goto error;
2151 PyTuple_SET_ITEM(pair, 0, item);
2152
2153 item = PyInt_FromLong(i2);
2154 if (!item)
2155 goto error;
2156 PyTuple_SET_ITEM(pair, 1, item);
2157
2158 return pair;
2159
2160 error:
2161 Py_DECREF(pair);
2162 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002163}
2164
2165static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002166match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002167{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002168 int index;
2169
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002170 PyObject* index_ = Py_False; /* zero */
2171 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2172 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002173
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002174 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002175
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002176 if (index < 0 || index >= self->groups) {
2177 PyErr_SetString(
2178 PyExc_IndexError,
2179 "no such group"
2180 );
2181 return NULL;
2182 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002183
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002184 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002185 return _pair(self->mark[index*2], self->mark[index*2+1]);
2186}
2187
2188static PyObject*
2189match_regs(MatchObject* self)
2190{
2191 PyObject* regs;
2192 PyObject* item;
2193 int index;
2194
2195 regs = PyTuple_New(self->groups);
2196 if (!regs)
2197 return NULL;
2198
2199 for (index = 0; index < self->groups; index++) {
2200 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2201 if (!item) {
2202 Py_DECREF(regs);
2203 return NULL;
2204 }
2205 PyTuple_SET_ITEM(regs, index, item);
2206 }
2207
2208 Py_INCREF(regs);
2209 self->regs = regs;
2210
2211 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002212}
2213
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002214static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002215 {"group", (PyCFunction) match_group, METH_VARARGS},
2216 {"start", (PyCFunction) match_start, METH_VARARGS},
2217 {"end", (PyCFunction) match_end, METH_VARARGS},
2218 {"span", (PyCFunction) match_span, METH_VARARGS},
2219 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2220 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2221 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002222 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002223};
2224
2225static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002226match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002227{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002228 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002229
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002230 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2231 if (res)
2232 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002233
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002234 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002235
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002236 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002237 if (self->lastindex >= 0)
2238 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002239 Py_INCREF(Py_None);
2240 return Py_None;
2241 }
2242
2243 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002244 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002245 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002246 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002247 );
2248 if (result)
2249 return result;
2250 PyErr_Clear();
2251 }
2252 Py_INCREF(Py_None);
2253 return Py_None;
2254 }
2255
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002256 if (!strcmp(name, "string")) {
2257 if (self->string) {
2258 Py_INCREF(self->string);
2259 return self->string;
2260 } else {
2261 Py_INCREF(Py_None);
2262 return Py_None;
2263 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002264 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002265
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002266 if (!strcmp(name, "regs")) {
2267 if (self->regs) {
2268 Py_INCREF(self->regs);
2269 return self->regs;
2270 } else
2271 return match_regs(self);
2272 }
2273
2274 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002275 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002276 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002277 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002278
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002279 if (!strcmp(name, "pos"))
2280 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002281
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002282 if (!strcmp(name, "endpos"))
2283 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002284
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002285 PyErr_SetString(PyExc_AttributeError, name);
2286 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002287}
2288
2289/* FIXME: implement setattr("string", None) as a special case (to
2290 detach the associated string, if any */
2291
2292statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002293 PyObject_HEAD_INIT(NULL)
2294 0, "SRE_Match",
2295 sizeof(MatchObject), sizeof(int),
2296 (destructor)match_dealloc, /*tp_dealloc*/
2297 0, /*tp_print*/
2298 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002299};
2300
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002301/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002302/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002303
2304static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002305scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002306{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002307 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002308 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002309 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002310}
2311
2312static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002313scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002314{
2315 SRE_STATE* state = &self->state;
2316 PyObject* match;
2317 int status;
2318
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002319 state_reset(state);
2320
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002321 state->ptr = state->start;
2322
2323 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002324 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002325 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002326#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002327 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002328#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002329 }
2330
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002331 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002332 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002333
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002334 if (status == 0 || state->ptr == state->start)
2335 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002336 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002337 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002338
2339 return match;
2340}
2341
2342
2343static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002344scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002345{
2346 SRE_STATE* state = &self->state;
2347 PyObject* match;
2348 int status;
2349
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002350 state_reset(state);
2351
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002352 state->ptr = state->start;
2353
2354 if (state->charsize == 1) {
2355 status = sre_search(state, PatternObject_GetCode(self->pattern));
2356 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002357#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002358 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002359#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002360 }
2361
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002362 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002363 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002364
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002365 if (status == 0 || state->ptr == state->start)
2366 state->start = (void*) ((char*) state->ptr + state->charsize);
2367 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002368 state->start = state->ptr;
2369
2370 return match;
2371}
2372
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002373static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002374 {"match", (PyCFunction) scanner_match, 0},
2375 {"search", (PyCFunction) scanner_search, 0},
2376 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002377};
2378
2379static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002380scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002381{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002382 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002383
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002384 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2385 if (res)
2386 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002387
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002388 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002389
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002390 /* attributes */
2391 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002392 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002393 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002394 }
2395
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002396 PyErr_SetString(PyExc_AttributeError, name);
2397 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002398}
2399
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002400statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002401 PyObject_HEAD_INIT(NULL)
2402 0, "SRE_Scanner",
2403 sizeof(ScannerObject), 0,
2404 (destructor)scanner_dealloc, /*tp_dealloc*/
2405 0, /*tp_print*/
2406 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002407};
2408
Guido van Rossumb700df92000-03-31 14:59:30 +00002409static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002410 {"compile", _compile, 1},
2411 {"getcodesize", sre_codesize, 1},
2412 {"getlower", sre_getlower, 1},
2413 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002414};
2415
Tim Peters5687ffe2001-02-28 16:44:18 +00002416DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002417init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002418{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002419 PyObject* m;
2420 PyObject* d;
2421
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002422 /* Patch object types */
2423 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002424 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002425
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002426 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002427 d = PyModule_GetDict(m);
2428
2429 PyDict_SetItemString(
2430 d, "MAGIC", (PyObject*) PyInt_FromLong(SRE_MAGIC)
2431 );
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002432
2433 PyDict_SetItemString(
2434 d, "copyright", (PyObject*) PyString_FromString(copyright)
2435 );
2436
Guido van Rossumb700df92000-03-31 14:59:30 +00002437}
2438
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002439#endif /* !defined(SRE_RECURSIVE) */