blob: b72b8b2c1a6de4903f226abdfdc148f3b682772a [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of (0.5)
9 * 2000-06-30 fl added fast search optimization (0.9.3)
10 * 2000-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11 * 2000-07-02 fl added charset optimizations, etc (0.9.5)
12 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
14 * 2000-07-21 fl reset lastindex in scanner methods (0.9.6)
15 * 2000-08-01 fl fixes for 1.6b1 (0.9.8)
16 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Guido van Rossumb700df92000-03-31 14:59:30 +000024 *
25 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
26 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000027 * This version of the SRE library can be redistributed under CNRI's
28 * Python 1.6 license. For any other use, please contact Secret Labs
29 * AB (info@pythonware.com).
30 *
Guido van Rossumb700df92000-03-31 14:59:30 +000031 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000032 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * other compatibility work.
34 */
35
36#ifndef SRE_RECURSIVE
37
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000038char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000039
40#include "Python.h"
41
42#include "sre.h"
43
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000044#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000045
Fredrik Lundh436c3d52000-06-29 08:58:44 +000046/* name of this module, minus the leading underscore */
47#define MODULE "sre"
48
Guido van Rossumb700df92000-03-31 14:59:30 +000049/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000050#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d52000-06-29 08:58:44 +000052#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000053/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000054#define HAVE_UNICODE
55#endif
56
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000057/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000058/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059
Fredrik Lundh33accc12000-08-27 20:59:47 +000060/* prevent run-away recursion (bad patterns on long strings) */
61
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000062#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000063#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
64/* require smaller recursion limit for a number of 64-bit platforms:
65 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
66/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
67#define USE_RECURSION_LIMIT 7500
68#else
69#define USE_RECURSION_LIMIT 10000
70#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000071#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000072
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000073/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000074#define USE_FAST_SEARCH
75
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000076/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000077#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078
79/* -------------------------------------------------------------------- */
80
Fredrik Lundh80946112000-06-29 18:03:25 +000081#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000082#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000083#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000084/* fastest possible local call under MSVC */
85#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000086#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000087#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#else
89#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000090#endif
91
92/* error codes */
93#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000094#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000095#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000096#define SRE_ERROR_MEMORY -9 /* out of memory */
97
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000098#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000099#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000100#else
101#define TRACE(v)
102#endif
103
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000104/* -------------------------------------------------------------------- */
105/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000106
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000107/* default character predicates (run sre_chars.py to regenerate tables) */
108
109#define SRE_DIGIT_MASK 1
110#define SRE_SPACE_MASK 2
111#define SRE_LINEBREAK_MASK 4
112#define SRE_ALNUM_MASK 8
113#define SRE_WORD_MASK 16
114
115static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1162, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11825, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1200, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
122
Fredrik Lundhb389df32000-06-29 12:48:37 +0000123static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000012410, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12527, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12644, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12761, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
128108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
129122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
130106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
131120, 121, 122, 123, 124, 125, 126, 127 };
132
Fredrik Lundhb389df32000-06-29 12:48:37 +0000133static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000134{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000135 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000136}
137
138#define SRE_IS_DIGIT(ch)\
139 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
140#define SRE_IS_SPACE(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
142#define SRE_IS_LINEBREAK(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
144#define SRE_IS_ALNUM(ch)\
145 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
146#define SRE_IS_WORD(ch)\
147 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000148
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000149/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000150
Fredrik Lundhb389df32000-06-29 12:48:37 +0000151static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000152{
153 return ((ch) < 256 ? tolower((ch)) : ch);
154}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000155#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
156#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
157#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
158#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
159#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
160
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000161/* unicode-specific character predicates */
162
163#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000164static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000165{
166 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
167}
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000168#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
169#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
170#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000171#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000172#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000173#endif
174
Guido van Rossumb700df92000-03-31 14:59:30 +0000175LOCAL(int)
176sre_category(SRE_CODE category, unsigned int ch)
177{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000178 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000179
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000180 case SRE_CATEGORY_DIGIT:
181 return SRE_IS_DIGIT(ch);
182 case SRE_CATEGORY_NOT_DIGIT:
183 return !SRE_IS_DIGIT(ch);
184 case SRE_CATEGORY_SPACE:
185 return SRE_IS_SPACE(ch);
186 case SRE_CATEGORY_NOT_SPACE:
187 return !SRE_IS_SPACE(ch);
188 case SRE_CATEGORY_WORD:
189 return SRE_IS_WORD(ch);
190 case SRE_CATEGORY_NOT_WORD:
191 return !SRE_IS_WORD(ch);
192 case SRE_CATEGORY_LINEBREAK:
193 return SRE_IS_LINEBREAK(ch);
194 case SRE_CATEGORY_NOT_LINEBREAK:
195 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000196
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000197 case SRE_CATEGORY_LOC_WORD:
198 return SRE_LOC_IS_WORD(ch);
199 case SRE_CATEGORY_LOC_NOT_WORD:
200 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000201
202#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000203 case SRE_CATEGORY_UNI_DIGIT:
204 return SRE_UNI_IS_DIGIT(ch);
205 case SRE_CATEGORY_UNI_NOT_DIGIT:
206 return !SRE_UNI_IS_DIGIT(ch);
207 case SRE_CATEGORY_UNI_SPACE:
208 return SRE_UNI_IS_SPACE(ch);
209 case SRE_CATEGORY_UNI_NOT_SPACE:
210 return !SRE_UNI_IS_SPACE(ch);
211 case SRE_CATEGORY_UNI_WORD:
212 return SRE_UNI_IS_WORD(ch);
213 case SRE_CATEGORY_UNI_NOT_WORD:
214 return !SRE_UNI_IS_WORD(ch);
215 case SRE_CATEGORY_UNI_LINEBREAK:
216 return SRE_UNI_IS_LINEBREAK(ch);
217 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
218 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000219#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000220 }
221 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000222}
223
224/* helpers */
225
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000226static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000227mark_fini(SRE_STATE* state)
228{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000229 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000230 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000231 state->mark_stack = NULL;
232 }
233 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000234}
235
236static int
237mark_save(SRE_STATE* state, int lo, int hi)
238{
239 void* stack;
240 int size;
241 int minsize, newsize;
242
243 if (hi <= lo)
244 return 0;
245
246 size = (hi - lo) + 1;
247
248 newsize = state->mark_stack_size;
249 minsize = state->mark_stack_base + size;
250
251 if (newsize < minsize) {
252 /* create new stack */
253 if (!newsize) {
254 newsize = 512;
255 if (newsize < minsize)
256 newsize = minsize;
257 TRACE(("allocate stack %d\n", newsize));
258 stack = malloc(sizeof(void*) * newsize);
259 } else {
260 /* grow the stack */
261 while (newsize < minsize)
262 newsize += newsize;
263 TRACE(("grow stack to %d\n", newsize));
264 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
265 }
266 if (!stack) {
267 mark_fini(state);
268 return SRE_ERROR_MEMORY;
269 }
270 state->mark_stack = stack;
271 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000272 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000273
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000274 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000275
276 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
277 size * sizeof(void*));
278
279 state->mark_stack_base += size;
280
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000281 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000282}
283
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000284static int
285mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000286{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000287 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000288
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000289 if (hi <= lo)
290 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000291
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000292 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000293
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000294 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000295
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000296 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000297
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000298 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
299 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000300
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000301 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000302}
303
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000304/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000305
306#define SRE_CHAR unsigned char
307#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000308#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000309#define SRE_CHARSET sre_charset
310#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000311#define SRE_MATCH sre_match
312#define SRE_SEARCH sre_search
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000313
314#if defined(HAVE_UNICODE)
315
Guido van Rossumb700df92000-03-31 14:59:30 +0000316#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000317#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000318#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000319
Guido van Rossumb700df92000-03-31 14:59:30 +0000320#undef SRE_SEARCH
321#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000322#undef SRE_INFO
323#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000324#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000325#undef SRE_AT
326#undef SRE_CHAR
327
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000328/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000329
330#define SRE_CHAR Py_UNICODE
331#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000332#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000333#define SRE_CHARSET sre_ucharset
334#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000335#define SRE_MATCH sre_umatch
336#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000337#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000338
339#endif /* SRE_RECURSIVE */
340
341/* -------------------------------------------------------------------- */
342/* String matching engine */
343
344/* the following section is compiled twice, with different character
345 settings */
346
347LOCAL(int)
348SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
349{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000350 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000352 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000353
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000354 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000355
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000356 case SRE_AT_BEGINNING:
357 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000358
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000359 case SRE_AT_BEGINNING_LINE:
360 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000361 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000362
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000363 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000364 return (((void*) (ptr+1) == state->end &&
365 SRE_IS_LINEBREAK((int) ptr[0])) ||
366 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000367
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000368 case SRE_AT_END_LINE:
369 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000370 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000371
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000372 case SRE_AT_BOUNDARY:
373 if (state->beginning == state->end)
374 return 0;
375 that = ((void*) ptr > state->beginning) ?
376 SRE_IS_WORD((int) ptr[-1]) : 0;
377 this = ((void*) ptr < state->end) ?
378 SRE_IS_WORD((int) ptr[0]) : 0;
379 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000380
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000381 case SRE_AT_NON_BOUNDARY:
382 if (state->beginning == state->end)
383 return 0;
384 that = ((void*) ptr > state->beginning) ?
385 SRE_IS_WORD((int) ptr[-1]) : 0;
386 this = ((void*) ptr < state->end) ?
387 SRE_IS_WORD((int) ptr[0]) : 0;
388 return this == that;
389 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000390
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000391 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000392}
393
394LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000395SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000396{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000400
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 for (;;) {
402 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000403
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000405 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000406 if (ch == set[0])
407 return ok;
408 set++;
409 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000412 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 if (set[0] <= ch && ch <= set[1])
414 return ok;
415 set += 2;
416 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000417
Fredrik Lundh3562f112000-07-02 12:00:07 +0000418 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000419 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000420 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
421 return ok;
422 set += 16;
423 break;
424
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000425 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000426 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000427 if (sre_category(set[0], (int) ch))
428 return ok;
429 set += 1;
430 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000431
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000432 case SRE_OP_NEGATE:
433 ok = !ok;
434 break;
435
436 case SRE_OP_FAILURE:
437 return !ok;
438
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000439 default:
440 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000441 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000442 return 0;
443 }
444 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000445}
446
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000447LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
448
449LOCAL(int)
450SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
451{
452 SRE_CODE chr;
453 SRE_CHAR* ptr = state->ptr;
454 SRE_CHAR* end = state->end;
455 int i;
456
457 /* adjust end */
458 if (maxcount < end - ptr && maxcount != 65535)
459 end = ptr + maxcount;
460
461 switch (pattern[0]) {
462
463 case SRE_OP_ANY:
464 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000465 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000466 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
467 ptr++;
468 break;
469
470 case SRE_OP_ANY_ALL:
471 /* repeated dot wildcare. skip to the end of the target
472 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000473 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000474 ptr = end;
475 break;
476
477 case SRE_OP_LITERAL:
478 /* repeated literal */
479 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000480 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000481 while (ptr < end && (SRE_CODE) *ptr == chr)
482 ptr++;
483 break;
484
485 case SRE_OP_LITERAL_IGNORE:
486 /* repeated literal */
487 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000488 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000489 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
490 ptr++;
491 break;
492
493 case SRE_OP_NOT_LITERAL:
494 /* repeated non-literal */
495 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000496 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497 while (ptr < end && (SRE_CODE) *ptr != chr)
498 ptr++;
499 break;
500
501 case SRE_OP_NOT_LITERAL_IGNORE:
502 /* repeated non-literal */
503 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000504 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000505 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
506 ptr++;
507 break;
508
509 case SRE_OP_IN:
510 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000511 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
512 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000513 ptr++;
514 break;
515
516 default:
517 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000518 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000519 while ((SRE_CHAR*) state->ptr < end) {
520 i = SRE_MATCH(state, pattern, level);
521 if (i < 0)
522 return i;
523 if (!i)
524 break;
525 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000526 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
527 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000528 return (SRE_CHAR*) state->ptr - ptr;
529 }
530
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000531 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000532 return ptr - (SRE_CHAR*) state->ptr;
533}
534
Fredrik Lundh33accc12000-08-27 20:59:47 +0000535#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000536LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000537SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
538{
539 /* check if an SRE_OP_INFO block matches at the current position.
540 returns the number of SRE_CODE objects to skip if successful, 0
541 if no match */
542
543 SRE_CHAR* end = state->end;
544 SRE_CHAR* ptr = state->ptr;
545 int i;
546
547 /* check minimal length */
548 if (pattern[3] && (end - ptr) < pattern[3])
549 return 0;
550
551 /* check known prefix */
552 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
553 /* <length> <skip> <prefix data> <overlap data> */
554 for (i = 0; i < pattern[5]; i++)
555 if ((SRE_CODE) ptr[i] != pattern[7 + i])
556 return 0;
557 return pattern[0] + 2 * pattern[6];
558 }
559 return pattern[0];
560}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000561#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000562
563LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000564SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000565{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000566 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000567 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000568
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000569 SRE_CHAR* end = state->end;
570 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000571 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000572 SRE_REPEAT* rp;
573 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000574 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000575
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000576 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000577
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000578 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000579
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000580#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000581 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000582 return SRE_ERROR_RECURSION_LIMIT;
583#endif
584
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000585#if defined(USE_RECURSION_LIMIT)
586 if (level > USE_RECURSION_LIMIT)
587 return SRE_ERROR_RECURSION_LIMIT;
588#endif
589
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000590 if (pattern[0] == SRE_OP_INFO) {
591 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000592 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000593 if (pattern[3] && (end - ptr) < pattern[3]) {
594 TRACE(("reject (got %d chars, need %d)\n",
595 (end - ptr), pattern[3]));
596 return 0;
597 }
598 pattern += pattern[1] + 1;
599 }
600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000601 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000602
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000603 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000605 case SRE_OP_FAILURE:
606 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000607 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000608 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000609
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000610 case SRE_OP_SUCCESS:
611 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000614 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000615
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000616 case SRE_OP_AT:
617 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000618 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000619 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000620 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000621 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000622 pattern++;
623 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000624
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000625 case SRE_OP_CATEGORY:
626 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000627 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000628 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000629 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000630 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000631 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000632 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000633 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000634
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000635 case SRE_OP_LITERAL:
636 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000637 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000638 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000639 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000640 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000641 pattern++;
642 ptr++;
643 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000644
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000645 case SRE_OP_NOT_LITERAL:
646 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000647 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000648 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000649 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000650 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000651 pattern++;
652 ptr++;
653 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000654
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000655 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000656 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000657 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000658 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000659 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
660 return 0;
661 ptr++;
662 break;
663
664 case SRE_OP_ANY_ALL:
665 /* match anything */
666 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000667 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000668 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000669 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000670 ptr++;
671 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000672
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000673 case SRE_OP_IN:
674 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000675 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000676 TRACE(("|%p|%p|IN\n", pattern, ptr));
677 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000678 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000679 pattern += pattern[0];
680 ptr++;
681 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000682
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000684 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000686 i = pattern[0];
687 {
688 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
689 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
690 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000691 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000692 while (p < e) {
693 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000694 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 p++; ptr++;
696 }
697 }
698 pattern++;
699 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000700
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000701 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000702 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000703 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000704 i = pattern[0];
705 {
706 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
707 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
708 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000709 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710 while (p < e) {
711 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000712 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000713 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000714 p++; ptr++;
715 }
716 }
717 pattern++;
718 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000719
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000720 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000721 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000722 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000723 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000724 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 pattern++;
726 ptr++;
727 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000728
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000730 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000731 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000732 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000733 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 pattern++;
735 ptr++;
736 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000737
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000739 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000741 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000742 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 pattern += pattern[0];
744 ptr++;
745 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000746
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000747 case SRE_OP_MARK:
748 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000749 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000750 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000751 i = pattern[0];
752 if (i & 1)
753 state->lastindex = i/2 + 1;
754 if (i > state->lastmark)
755 state->lastmark = i;
756 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 pattern++;
758 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000759
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000760 case SRE_OP_JUMP:
761 case SRE_OP_INFO:
762 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000763 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000764 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000765 pattern += pattern[0];
766 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000767
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 case SRE_OP_ASSERT:
769 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000770 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000771 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000772 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000773 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000774 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000775 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000776 if (i <= 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000777 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000778 pattern += pattern[0];
779 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000780
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000781 case SRE_OP_ASSERT_NOT:
782 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000783 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000784 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000785 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000786 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000787 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000788 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000789 if (i < 0)
790 return i;
791 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000792 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 pattern += pattern[0];
794 break;
795
796 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000797 /* alternation */
798 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000799 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000800 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000801 for (; pattern[0]; pattern += pattern[0]) {
802 if (pattern[1] == SRE_OP_LITERAL &&
803 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
804 continue;
805 if (pattern[1] == SRE_OP_IN &&
806 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
807 continue;
808 state->ptr = ptr;
809 i = SRE_MATCH(state, pattern + 1, level + 1);
810 if (i)
811 return i;
812 if (state->lastmark > lastmark) {
813 memset(
814 state->mark + lastmark + 1, 0,
815 (state->lastmark - lastmark) * sizeof(void*)
816 );
817 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000818 }
819 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000820 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000821
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000822 case SRE_OP_REPEAT_ONE:
823 /* match repeated sequence (maximizing regexp) */
824
825 /* this operator only works if the repeated item is
826 exactly one character wide, and we're not already
827 collecting backtracking points. for other cases,
828 use the MAX_REPEAT operator instead */
829
830 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
831
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000832 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000833 pattern[1], pattern[2]));
834
Fredrik Lundhe1869832000-08-01 22:47:49 +0000835 if (ptr + pattern[1] > end)
836 return 0; /* cannot match */
837
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000838 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000839
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000840 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
841 if (count < 0)
842 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000843
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000844 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000845
846 /* when we arrive here, count contains the number of
847 matches, and ptr points to the tail of the target
848 string. check if the rest of the pattern matches,
849 and backtrack if not. */
850
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000851 if (count < (int) pattern[1])
852 return 0;
853
854 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
855 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000856 state->ptr = ptr;
857 return 1;
858
859 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
860 /* tail starts with a literal. skip positions where
861 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000862 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000863 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000864 while (count >= (int) pattern[1] &&
865 (ptr >= end || *ptr != chr)) {
866 ptr--;
867 count--;
868 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000869 if (count < (int) pattern[1])
870 break;
871 state->ptr = ptr;
872 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000873 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000874 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000875 ptr--;
876 count--;
877 }
878
879 } else {
880 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000881 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000882 while (count >= (int) pattern[1]) {
883 state->ptr = ptr;
884 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000885 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000886 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000887 ptr--;
888 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000889 if (state->lastmark > lastmark) {
890 memset(
891 state->mark + lastmark + 1, 0,
892 (state->lastmark - lastmark) * sizeof(void*)
893 );
894 state->lastmark = lastmark;
895 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000896 }
897 }
898 return 0;
899
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000900 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000901 /* create repeat context. all the hard work is done
902 by the UNTIL operator */
903 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000904 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000905 pattern[1], pattern[2]));
906
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000907 rep.count = -1;
908 rep.pattern = pattern;
909
910 /* install new repeat context */
911 rep.prev = state->repeat;
912 state->repeat = &rep;
913
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000914 state->ptr = ptr;
915 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000916
917 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000918
919 return i;
920
921 case SRE_OP_MAX_UNTIL:
922 /* maximizing repeat */
923 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
924
925 /* FIXME: we probably need to deal with zero-width
926 matches in here... */
927
928 rp = state->repeat;
929 if (!rp)
930 return SRE_ERROR_STATE;
931
932 state->ptr = ptr;
933
934 count = rp->count + 1;
935
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000936 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000937
938 if (count < rp->pattern[1]) {
939 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000940 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000941 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000942 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000943 if (i)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000944 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000945 rp->count = count - 1;
946 state->ptr = ptr;
947 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000948 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000949
950 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000951 /* we may have enough matches, but if we can
952 match another item, do so */
953 rp->count = count;
954 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000955 i = mark_save(state, 0, lastmark);
956 if (i < 0)
957 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000958 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000959 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000960 if (i)
961 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +0000962 i = mark_restore(state, 0, lastmark);
963 if (i < 0)
964 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000965 rp->count = count - 1;
966 state->ptr = ptr;
967 }
968
969 /* cannot match more repeated items here. make sure the
970 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000971 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000972 i = SRE_MATCH(state, pattern, level + 1);
973 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000974 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000975 state->repeat = rp;
976 return 0;
977
978 case SRE_OP_MIN_UNTIL:
979 /* minimizing repeat */
980 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
981
982 rp = state->repeat;
983 if (!rp)
984 return SRE_ERROR_STATE;
985
986 count = rp->count + 1;
987
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000988 TRACE(("|%p|%p|MIN_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000989
990 state->ptr = ptr;
991
992 if (count < rp->pattern[1]) {
993 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000994 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000995 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000996 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000997 if (i)
998 return i;
999 rp->count = count-1;
1000 state->ptr = ptr;
1001 return 0;
1002 }
1003
1004 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001005 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001006 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001007 if (i) {
1008 /* free(rp); */
1009 return i;
1010 }
1011 state->repeat = rp;
1012
1013 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1014 return 0;
1015
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001016 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001017 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001018 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001019 if (i)
1020 return i;
1021 rp->count = count - 1;
1022 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001023
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001024 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001025 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001026 return SRE_ERROR_ILLEGAL;
1027 }
1028 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001029
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001030 /* shouldn't end up here */
1031 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001032}
1033
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001034LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001035SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1036{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001037 SRE_CHAR* ptr = state->start;
1038 SRE_CHAR* end = state->end;
1039 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001040 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001041 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001042 SRE_CODE* prefix = NULL;
1043 SRE_CODE* charset = NULL;
1044 SRE_CODE* overlap = NULL;
1045 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001046
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001047 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001048 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001049 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001050
1051 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001052
1053 if (pattern[3] > 0) {
1054 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001055 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001056 end -= pattern[3]-1;
1057 if (end <= ptr)
1058 end = ptr+1;
1059 }
1060
Fredrik Lundh3562f112000-07-02 12:00:07 +00001061 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001062 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001063 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001064 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001065 prefix_skip = pattern[6];
1066 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001067 overlap = prefix + prefix_len - 1;
1068 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001069 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001070 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001071 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001072
1073 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001074 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001075
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001076 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1077 TRACE(("charset = %p\n", charset));
1078
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001079#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001080 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001081 /* pattern starts with a known prefix. use the overlap
1082 table to skip forward as fast as we possibly can */
1083 int i = 0;
1084 end = state->end;
1085 while (ptr < end) {
1086 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001087 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001088 if (!i)
1089 break;
1090 else
1091 i = overlap[i];
1092 } else {
1093 if (++i == prefix_len) {
1094 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001095 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1096 state->start = ptr + 1 - prefix_len;
1097 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001098 if (flags & SRE_INFO_LITERAL)
1099 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001100 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001101 if (status != 0)
1102 return status;
1103 /* close but no cigar -- try again */
1104 i = overlap[i];
1105 }
1106 break;
1107 }
1108
1109 }
1110 ptr++;
1111 }
1112 return 0;
1113 }
1114#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001115
Fredrik Lundh3562f112000-07-02 12:00:07 +00001116 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001117 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001118 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001119 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001120 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001121 for (;;) {
1122 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1123 ptr++;
1124 if (ptr == end)
1125 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001126 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001127 state->start = ptr;
1128 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001129 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001130 if (status != 0)
1131 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001132 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001133 } else if (charset) {
1134 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001135 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001136 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001137 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001138 ptr++;
1139 if (ptr == end)
1140 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001141 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001142 state->start = ptr;
1143 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001144 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001145 if (status != 0)
1146 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001147 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001148 }
1149 } else
1150 /* general case */
1151 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001152 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001153 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001154 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001155 if (status != 0)
1156 break;
1157 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001159 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001160}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001161
Guido van Rossumb700df92000-03-31 14:59:30 +00001162
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001163#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001164
1165/* -------------------------------------------------------------------- */
1166/* factories and destructors */
1167
1168/* see sre.h for object declarations */
1169
1170staticforward PyTypeObject Pattern_Type;
1171staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001172staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001173
1174static PyObject *
1175_compile(PyObject* self_, PyObject* args)
1176{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001177 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001179 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001180 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001182 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001183 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001184 PyObject* code;
1185 int groups = 0;
1186 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001187 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001188 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001189 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001190 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001191
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001192 code = PySequence_Fast(code, "code argument must be a sequence");
1193 if (!code)
1194 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001195
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001196#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001197 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001198#else
1199 n = PySequence_Length(code);
1200#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001201
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001202 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1203 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001204 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001205 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001206 }
1207
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001208 for (i = 0; i < n; i++) {
1209 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001210 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001211 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001212
1213 Py_DECREF(code);
1214
1215 if (PyErr_Occurred())
1216 return NULL;
1217
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001218 Py_INCREF(pattern);
1219 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001220
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001221 self->flags = flags;
1222
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001223 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001224
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001225 Py_XINCREF(groupindex);
1226 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001227
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001228 Py_XINCREF(indexgroup);
1229 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001230
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001231 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001232}
1233
1234static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001235sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001236{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001237 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001238}
1239
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001240static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001241sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001242{
1243 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001244 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001245 return NULL;
1246 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001247 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001248#if defined(HAVE_UNICODE)
1249 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001250 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001251#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001252 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001253}
1254
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001255LOCAL(void)
1256state_reset(SRE_STATE* state)
1257{
1258 int i;
1259
1260 state->lastmark = 0;
1261
1262 /* FIXME: dynamic! */
1263 for (i = 0; i < SRE_MARK_SIZE; i++)
1264 state->mark[i] = NULL;
1265
1266 state->lastindex = -1;
1267
1268 state->repeat = NULL;
1269
1270 mark_fini(state);
1271}
1272
Guido van Rossumb700df92000-03-31 14:59:30 +00001273LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001274state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1275 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001276{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001277 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001278
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001279 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001280 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001281 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001282
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001283 memset(state, 0, sizeof(SRE_STATE));
1284
1285 state->lastindex = -1;
1286
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001287#if defined(HAVE_UNICODE)
1288 if (PyUnicode_Check(string)) {
1289 /* unicode strings doesn't always support the buffer interface */
1290 ptr = (void*) PyUnicode_AS_DATA(string);
1291 bytes = PyUnicode_GET_DATA_SIZE(string);
1292 size = PyUnicode_GET_SIZE(string);
1293 state->charsize = sizeof(Py_UNICODE);
1294
1295 } else {
1296#endif
1297
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001298 /* get pointer to string buffer */
1299 buffer = string->ob_type->tp_as_buffer;
1300 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1301 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001302 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001303 return NULL;
1304 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001305
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001306 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001307 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1308 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001309 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1310 return NULL;
1311 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001312
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001313 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001314#if PY_VERSION_HEX >= 0x01060000
1315 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001316#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001317 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001318#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001319
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001320 if (PyString_Check(string) || bytes == size)
1321 state->charsize = 1;
1322#if defined(HAVE_UNICODE)
1323 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1324 state->charsize = sizeof(Py_UNICODE);
1325#endif
1326 else {
1327 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1328 return NULL;
1329 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001330
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001331#if defined(HAVE_UNICODE)
1332 }
1333#endif
1334
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001335 /* adjust boundaries */
1336 if (start < 0)
1337 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001338 else if (start > size)
1339 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001340
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001341 if (end < 0)
1342 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001343 else if (end > size)
1344 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001345
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001346 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001347
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001348 state->start = (void*) ((char*) ptr + start * state->charsize);
1349 state->end = (void*) ((char*) ptr + end * state->charsize);
1350
1351 Py_INCREF(string);
1352 state->string = string;
1353 state->pos = start;
1354 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001355
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001356 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001357 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001358#if defined(HAVE_UNICODE)
1359 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001360 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001361#endif
1362 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001363 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001364
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001365 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001366}
1367
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001368LOCAL(void)
1369state_fini(SRE_STATE* state)
1370{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001371 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001372 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001373}
1374
1375LOCAL(PyObject*)
1376state_getslice(SRE_STATE* state, int index, PyObject* string)
1377{
Fredrik Lundh58100642000-08-09 09:14:35 +00001378 int i, j;
1379
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001380 index = (index - 1) * 2;
1381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001382 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh58100642000-08-09 09:14:35 +00001383 i = j = 0;
1384 } else {
1385 i = ((char*)state->mark[index] - (char*)state->beginning) /
1386 state->charsize;
1387 j = ((char*)state->mark[index+1] - (char*)state->beginning) /
1388 state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001389 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001390
Fredrik Lundh58100642000-08-09 09:14:35 +00001391 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001392}
1393
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001394static void
1395pattern_error(int status)
1396{
1397 switch (status) {
1398 case SRE_ERROR_RECURSION_LIMIT:
1399 PyErr_SetString(
1400 PyExc_RuntimeError,
1401 "maximum recursion limit exceeded"
1402 );
1403 break;
1404 case SRE_ERROR_MEMORY:
1405 PyErr_NoMemory();
1406 break;
1407 default:
1408 /* other error codes indicate compiler/engine bugs */
1409 PyErr_SetString(
1410 PyExc_RuntimeError,
1411 "internal error in regular expression engine"
1412 );
1413 }
1414}
1415
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001416static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001417pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001418{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001419 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001420
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001421 MatchObject* match;
1422 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001423 char* base;
1424 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001425
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001426 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001427
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001428 /* create match object (with room for extra group marks) */
1429 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001430 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001431 if (!match)
1432 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001433
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001434 Py_INCREF(pattern);
1435 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001436
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001437 Py_INCREF(state->string);
1438 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001439
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001440 match->regs = NULL;
1441 match->groups = pattern->groups+1;
1442
1443 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001444
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001445 base = (char*) state->beginning;
1446 n = state->charsize;
1447
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001448 match->mark[0] = ((char*) state->start - base) / n;
1449 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001450
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001451 for (i = j = 0; i < pattern->groups; i++, j+=2)
1452 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1453 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1454 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1455 } else
1456 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1457
1458 match->pos = state->pos;
1459 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001460
Fredrik Lundh6f013982000-07-03 18:44:21 +00001461 match->lastindex = state->lastindex;
1462
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001463 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001464
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001465 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001466
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001467 /* no match */
1468 Py_INCREF(Py_None);
1469 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001470
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001471 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001472
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001473 /* internal error */
1474 pattern_error(status);
1475 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001476}
1477
1478static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001479pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001480{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001481 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001482
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001483 ScannerObject* self;
1484
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001485 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001486 int start = 0;
1487 int end = INT_MAX;
1488 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1489 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001490
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001491 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001492 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001493 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001494 return NULL;
1495
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001496 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001497 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001498 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001499 return NULL;
1500 }
1501
1502 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001503 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001504
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001505 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001506}
1507
Guido van Rossumb700df92000-03-31 14:59:30 +00001508static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001509pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001510{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001511 Py_XDECREF(self->pattern);
1512 Py_XDECREF(self->groupindex);
1513 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001514}
1515
1516static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001517pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001518{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001519 SRE_STATE state;
1520 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001521
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001522 PyObject* string;
1523 int start = 0;
1524 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001525 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1526 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1527 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001528 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001529
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001530 string = state_init(&state, self, string, start, end);
1531 if (!string)
1532 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001533
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001534 state.ptr = state.start;
1535
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001536 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1537
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001538 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001539 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001540 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001541#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001542 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001543#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001544 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001545
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001546 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1547
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001548 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001549
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001550 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001551}
1552
1553static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001554pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001555{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001556 SRE_STATE state;
1557 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001558
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001559 PyObject* string;
1560 int start = 0;
1561 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001562 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1563 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1564 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001565 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001566
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001567 string = state_init(&state, self, string, start, end);
1568 if (!string)
1569 return NULL;
1570
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001571 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1572
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001573 if (state.charsize == 1) {
1574 status = sre_search(&state, PatternObject_GetCode(self));
1575 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001576#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001577 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001578#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001579 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001580
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001581 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1582
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001583 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001585 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001586}
1587
1588static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001589call(char* function, PyObject* args)
1590{
1591 PyObject* name;
1592 PyObject* module;
1593 PyObject* func;
1594 PyObject* result;
1595
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001596 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001597 if (!name)
1598 return NULL;
1599 module = PyImport_Import(name);
1600 Py_DECREF(name);
1601 if (!module)
1602 return NULL;
1603 func = PyObject_GetAttrString(module, function);
1604 Py_DECREF(module);
1605 if (!func)
1606 return NULL;
1607 result = PyObject_CallObject(func, args);
1608 Py_DECREF(func);
1609 Py_DECREF(args);
1610 return result;
1611}
1612
1613static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001614pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001615{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001616 PyObject* template;
1617 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001618 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001619 static char* kwlist[] = { "repl", "string", "count", NULL };
1620 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
1621 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001622 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001623
1624 /* delegate to Python code */
1625 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1626}
1627
1628static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001629pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001630{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001631 PyObject* template;
1632 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001633 PyObject* count = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001634 static char* kwlist[] = { "repl", "string", "count", NULL };
1635 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
1636 &template, &string, &count))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001637 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001638
1639 /* delegate to Python code */
1640 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1641}
1642
1643static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001644pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001645{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001646 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001647 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001648 static char* kwlist[] = { "source", "maxsplit", NULL };
1649 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
1650 &string, &maxsplit))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001651 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001652
1653 /* delegate to Python code */
1654 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1655}
1656
1657static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001658pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001659{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001660 SRE_STATE state;
1661 PyObject* list;
1662 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001663 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001664
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001665 PyObject* string;
1666 int start = 0;
1667 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001668 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1669 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1670 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001671 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001672
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001673 string = state_init(&state, self, string, start, end);
1674 if (!string)
1675 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001676
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001677 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001678
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001679 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001680
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001681 PyObject* item;
1682
1683 state.ptr = state.start;
1684
1685 if (state.charsize == 1) {
1686 status = sre_search(&state, PatternObject_GetCode(self));
1687 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001688#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001689 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001690#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001691 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001692
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001693 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001694
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001695 /* don't bother to build a match object */
1696 switch (self->groups) {
1697 case 0:
1698 item = PySequence_GetSlice(
1699 string,
1700 ((char*) state.start - (char*) state.beginning) /
1701 state.charsize,
1702 ((char*) state.ptr - (char*) state.beginning) /
1703 state.charsize);
1704 if (!item)
1705 goto error;
1706 break;
1707 case 1:
1708 item = state_getslice(&state, 1, string);
1709 if (!item)
1710 goto error;
1711 break;
1712 default:
1713 item = PyTuple_New(self->groups);
1714 if (!item)
1715 goto error;
1716 for (i = 0; i < self->groups; i++) {
1717 PyObject* o = state_getslice(&state, i+1, string);
1718 if (!o) {
1719 Py_DECREF(item);
1720 goto error;
1721 }
1722 PyTuple_SET_ITEM(item, i, o);
1723 }
1724 break;
1725 }
1726
Fredrik Lundhe67d8e52000-08-27 21:32:46 +00001727 status = PyList_Append(list, item);
1728 Py_DECREF(item);
1729
1730 if (status < 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001731 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001732
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001733 if (state.ptr == state.start)
1734 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001735 else
1736 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001737
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001738 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001739
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001740 if (status == 0)
1741 break;
1742
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001743 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001744 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001745
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001746 }
1747 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001749 state_fini(&state);
1750 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001751
1752error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001753 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001754 state_fini(&state);
1755 return NULL;
1756
Guido van Rossumb700df92000-03-31 14:59:30 +00001757}
1758
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001759static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00001760 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
1761 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
1762 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
1763 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
1764 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
1765 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001766 /* experimental */
Fredrik Lundh562586e2000-10-03 20:43:34 +00001767 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001768 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001769};
1770
1771static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001772pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001773{
1774 PyObject* res;
1775
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001776 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001777
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001778 if (res)
1779 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001780
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001781 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001782
1783 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001784 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001785 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001786 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001787 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001788
1789 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001790 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001791
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001792 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001793 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001794
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001795 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001796 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001797 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001798 }
1799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001800 PyErr_SetString(PyExc_AttributeError, name);
1801 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001802}
1803
1804statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001805 PyObject_HEAD_INIT(NULL)
1806 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001807 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001808 (destructor)pattern_dealloc, /*tp_dealloc*/
1809 0, /*tp_print*/
1810 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001811};
1812
1813/* -------------------------------------------------------------------- */
1814/* match methods */
1815
1816static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001817match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001818{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001819 Py_XDECREF(self->regs);
1820 Py_XDECREF(self->string);
1821 Py_DECREF(self->pattern);
1822 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001823}
1824
1825static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001826match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001827{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001828 if (index < 0 || index >= self->groups) {
1829 /* raise IndexError if we were given a bad group number */
1830 PyErr_SetString(
1831 PyExc_IndexError,
1832 "no such group"
1833 );
1834 return NULL;
1835 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001836
Fredrik Lundh6f013982000-07-03 18:44:21 +00001837 index *= 2;
1838
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001839 if (self->string == Py_None || self->mark[index] < 0) {
1840 /* return default value if the string or group is undefined */
1841 Py_INCREF(def);
1842 return def;
1843 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001845 return PySequence_GetSlice(
1846 self->string, self->mark[index], self->mark[index+1]
1847 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001848}
1849
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001850static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001851match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001852{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001853 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001854
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001855 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001856 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001857
Fredrik Lundh6f013982000-07-03 18:44:21 +00001858 i = -1;
1859
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001860 if (self->pattern->groupindex) {
1861 index = PyObject_GetItem(self->pattern->groupindex, index);
1862 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001863 if (PyInt_Check(index))
1864 i = (int) PyInt_AS_LONG(index);
1865 Py_DECREF(index);
1866 } else
1867 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001868 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001869
1870 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001871}
1872
1873static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001874match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001875{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001876 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001877}
1878
1879static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001880match_expand(MatchObject* self, PyObject* args)
1881{
1882 PyObject* template;
1883 if (!PyArg_ParseTuple(args, "O:expand", &template))
1884 return NULL;
1885
1886 /* delegate to Python code */
1887 return call(
1888 "_expand",
1889 Py_BuildValue("OOO", self->pattern, self, template)
1890 );
1891}
1892
1893static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001894match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001895{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001896 PyObject* result;
1897 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001898
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001899 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001900
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001901 switch (size) {
1902 case 0:
1903 result = match_getslice(self, Py_False, Py_None);
1904 break;
1905 case 1:
1906 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1907 break;
1908 default:
1909 /* fetch multiple items */
1910 result = PyTuple_New(size);
1911 if (!result)
1912 return NULL;
1913 for (i = 0; i < size; i++) {
1914 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001915 self, PyTuple_GET_ITEM(args, i), Py_None
1916 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001917 if (!item) {
1918 Py_DECREF(result);
1919 return NULL;
1920 }
1921 PyTuple_SET_ITEM(result, i, item);
1922 }
1923 break;
1924 }
1925 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001926}
1927
1928static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001929match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001930{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001931 PyObject* result;
1932 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001935 static char* kwlist[] = { "default", NULL };
1936 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001937 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001938
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001939 result = PyTuple_New(self->groups-1);
1940 if (!result)
1941 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001942
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001943 for (index = 1; index < self->groups; index++) {
1944 PyObject* item;
1945 item = match_getslice_by_index(self, index, def);
1946 if (!item) {
1947 Py_DECREF(result);
1948 return NULL;
1949 }
1950 PyTuple_SET_ITEM(result, index-1, item);
1951 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001952
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001953 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001954}
1955
1956static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001957match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001958{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001959 PyObject* result;
1960 PyObject* keys;
1961 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001962
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001963 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001964 static char* kwlist[] = { "default", NULL };
1965 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001966 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001967
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001968 result = PyDict_New();
1969 if (!result || !self->pattern->groupindex)
1970 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001971
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001972 keys = PyMapping_Keys(self->pattern->groupindex);
1973 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001974 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001975 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001976 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001977
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001978 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1979 PyObject* key;
1980 PyObject* item;
1981 key = PyList_GET_ITEM(keys, index);
1982 if (!key) {
1983 Py_DECREF(keys);
1984 Py_DECREF(result);
1985 return NULL;
1986 }
1987 item = match_getslice(self, key, def);
1988 if (!item) {
1989 Py_DECREF(key);
1990 Py_DECREF(keys);
1991 Py_DECREF(result);
1992 return NULL;
1993 }
1994 /* FIXME: <fl> this can fail, right? */
1995 PyDict_SetItem(result, key, item);
1996 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001997
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001998 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001999
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002000 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002001}
2002
2003static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002004match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002005{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002006 int index;
2007
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002008 PyObject* index_ = Py_False; /* zero */
2009 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2010 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002011
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002012 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002013
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002014 if (index < 0 || index >= self->groups) {
2015 PyErr_SetString(
2016 PyExc_IndexError,
2017 "no such group"
2018 );
2019 return NULL;
2020 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002021
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002022 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002023 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002024}
2025
2026static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002027match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002028{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002029 int index;
2030
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002031 PyObject* index_ = Py_False; /* zero */
2032 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2033 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002034
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002035 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002036
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002037 if (index < 0 || index >= self->groups) {
2038 PyErr_SetString(
2039 PyExc_IndexError,
2040 "no such group"
2041 );
2042 return NULL;
2043 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002044
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002045 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002046 return Py_BuildValue("i", self->mark[index*2+1]);
2047}
2048
2049LOCAL(PyObject*)
2050_pair(int i1, int i2)
2051{
2052 PyObject* pair;
2053 PyObject* item;
2054
2055 pair = PyTuple_New(2);
2056 if (!pair)
2057 return NULL;
2058
2059 item = PyInt_FromLong(i1);
2060 if (!item)
2061 goto error;
2062 PyTuple_SET_ITEM(pair, 0, item);
2063
2064 item = PyInt_FromLong(i2);
2065 if (!item)
2066 goto error;
2067 PyTuple_SET_ITEM(pair, 1, item);
2068
2069 return pair;
2070
2071 error:
2072 Py_DECREF(pair);
2073 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002074}
2075
2076static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002077match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002078{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002079 int index;
2080
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002081 PyObject* index_ = Py_False; /* zero */
2082 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2083 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002084
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002085 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002086
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002087 if (index < 0 || index >= self->groups) {
2088 PyErr_SetString(
2089 PyExc_IndexError,
2090 "no such group"
2091 );
2092 return NULL;
2093 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002094
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002095 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002096 return _pair(self->mark[index*2], self->mark[index*2+1]);
2097}
2098
2099static PyObject*
2100match_regs(MatchObject* self)
2101{
2102 PyObject* regs;
2103 PyObject* item;
2104 int index;
2105
2106 regs = PyTuple_New(self->groups);
2107 if (!regs)
2108 return NULL;
2109
2110 for (index = 0; index < self->groups; index++) {
2111 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2112 if (!item) {
2113 Py_DECREF(regs);
2114 return NULL;
2115 }
2116 PyTuple_SET_ITEM(regs, index, item);
2117 }
2118
2119 Py_INCREF(regs);
2120 self->regs = regs;
2121
2122 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002123}
2124
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002125static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002126 {"group", (PyCFunction) match_group, METH_VARARGS},
2127 {"start", (PyCFunction) match_start, METH_VARARGS},
2128 {"end", (PyCFunction) match_end, METH_VARARGS},
2129 {"span", (PyCFunction) match_span, METH_VARARGS},
2130 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2131 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2132 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002133 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002134};
2135
2136static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002137match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002138{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002139 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002140
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2142 if (res)
2143 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002144
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002145 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002146
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002147 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002148 if (self->lastindex >= 0)
2149 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002150 Py_INCREF(Py_None);
2151 return Py_None;
2152 }
2153
2154 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002155 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002156 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002157 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002158 );
2159 if (result)
2160 return result;
2161 PyErr_Clear();
2162 }
2163 Py_INCREF(Py_None);
2164 return Py_None;
2165 }
2166
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002167 if (!strcmp(name, "string")) {
2168 if (self->string) {
2169 Py_INCREF(self->string);
2170 return self->string;
2171 } else {
2172 Py_INCREF(Py_None);
2173 return Py_None;
2174 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002175 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002176
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002177 if (!strcmp(name, "regs")) {
2178 if (self->regs) {
2179 Py_INCREF(self->regs);
2180 return self->regs;
2181 } else
2182 return match_regs(self);
2183 }
2184
2185 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002186 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002187 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002188 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002189
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002190 if (!strcmp(name, "pos"))
2191 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002192
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002193 if (!strcmp(name, "endpos"))
2194 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002195
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002196 PyErr_SetString(PyExc_AttributeError, name);
2197 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002198}
2199
2200/* FIXME: implement setattr("string", None) as a special case (to
2201 detach the associated string, if any */
2202
2203statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002204 PyObject_HEAD_INIT(NULL)
2205 0, "SRE_Match",
2206 sizeof(MatchObject), sizeof(int),
2207 (destructor)match_dealloc, /*tp_dealloc*/
2208 0, /*tp_print*/
2209 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002210};
2211
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002212/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002213/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002214
2215static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002216scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002217{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002218 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002219 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002220 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002221}
2222
2223static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002224scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002225{
2226 SRE_STATE* state = &self->state;
2227 PyObject* match;
2228 int status;
2229
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002230 state_reset(state);
2231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002232 state->ptr = state->start;
2233
2234 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002235 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002236 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002237#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002238 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002239#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002240 }
2241
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002242 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002243 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002244
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002245 if (status == 0 || state->ptr == state->start)
2246 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002247 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002248 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002249
2250 return match;
2251}
2252
2253
2254static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002255scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002256{
2257 SRE_STATE* state = &self->state;
2258 PyObject* match;
2259 int status;
2260
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002261 state_reset(state);
2262
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002263 state->ptr = state->start;
2264
2265 if (state->charsize == 1) {
2266 status = sre_search(state, PatternObject_GetCode(self->pattern));
2267 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002268#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002269 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002270#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002271 }
2272
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002273 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002274 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002275
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002276 if (status == 0 || state->ptr == state->start)
2277 state->start = (void*) ((char*) state->ptr + state->charsize);
2278 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002279 state->start = state->ptr;
2280
2281 return match;
2282}
2283
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002284static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002285 {"match", (PyCFunction) scanner_match, 0},
2286 {"search", (PyCFunction) scanner_search, 0},
2287 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002288};
2289
2290static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002291scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002292{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002293 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002294
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002295 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2296 if (res)
2297 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002298
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002299 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002300
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002301 /* attributes */
2302 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002303 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002304 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002305 }
2306
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002307 PyErr_SetString(PyExc_AttributeError, name);
2308 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002309}
2310
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002311statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002312 PyObject_HEAD_INIT(NULL)
2313 0, "SRE_Scanner",
2314 sizeof(ScannerObject), 0,
2315 (destructor)scanner_dealloc, /*tp_dealloc*/
2316 0, /*tp_print*/
2317 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002318};
2319
Guido van Rossumb700df92000-03-31 14:59:30 +00002320static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002321 {"compile", _compile, 1},
2322 {"getcodesize", sre_codesize, 1},
2323 {"getlower", sre_getlower, 1},
2324 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002325};
2326
2327void
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002328#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002329__declspec(dllexport)
2330#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002331init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002332{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002333 /* Patch object types */
2334 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002335 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002336
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002337 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002338}
2339
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002340#endif /* !defined(SRE_RECURSIVE) */