blob: 8add74e4225b8e93341e64fd64050cf557273b18 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00007 * 99-10-24 fl created (based on existing template matcher code)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00008 * 00-03-06 fl first alpha, sort of (0.5)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00009 * 00-06-30 fl added fast search optimization (0.9.3)
10 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
11 * 00-07-02 fl added charset optimizations, etc (0.9.5)
12 * 00-07-03 fl store code in pattern object, lookbehind, etc
13 * 00-07-08 fl added regs attribute
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000014 * 00-07-21 fl reset lastindex in scanner methods (0.9.6)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000015 * 00-08-01 fl fixes for 1.6b1 (0.9.8)
Fredrik Lundh96ab4652000-08-03 16:29:50 +000016 * 00-08-02 fl moved SRE_COUNT out of the match method
17 * 00-08-03 fl added recursion limit
Guido van Rossumb700df92000-03-31 14:59:30 +000018 *
19 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
20 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000021 * This version of the SRE library can be redistributed under CNRI's
22 * Python 1.6 license. For any other use, please contact Secret Labs
23 * AB (info@pythonware.com).
24 *
Guido van Rossumb700df92000-03-31 14:59:30 +000025 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000026 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000027 * other compatibility work.
28 */
29
30#ifndef SRE_RECURSIVE
31
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000032char copyright[] = " SRE 0.9.8 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000033
34#include "Python.h"
35
36#include "sre.h"
37
Guido van Rossumb700df92000-03-31 14:59:30 +000038#if defined(HAVE_LIMITS_H)
39#include <limits.h>
40#else
41#define INT_MAX 2147483647
42#endif
43
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000044#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000045
Fredrik Lundh436c3d582000-06-29 08:58:44 +000046/* name of this module, minus the leading underscore */
47#define MODULE "sre"
48
Guido van Rossumb700df92000-03-31 14:59:30 +000049/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000050#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000053/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000054#define HAVE_UNICODE
55#endif
56
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000057/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000058/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059
Fredrik Lundh96ab4652000-08-03 16:29:50 +000060/* prevent run-away recursion (bad patterns on long strings) */
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000061#if !defined(USE_STACKCHECK)
Fredrik Lundh96ab4652000-08-03 16:29:50 +000062#define USE_RECURSION_LIMIT 10000
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000063#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000064
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000065/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000069#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070
71/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
89
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000090#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000091#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000092#else
93#define TRACE(v)
94#endif
95
Fredrik Lundh436c3d582000-06-29 08:58:44 +000096#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000097
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000098/* -------------------------------------------------------------------- */
99/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000100
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000101/* default character predicates (run sre_chars.py to regenerate tables) */
102
103#define SRE_DIGIT_MASK 1
104#define SRE_SPACE_MASK 2
105#define SRE_LINEBREAK_MASK 4
106#define SRE_ALNUM_MASK 8
107#define SRE_WORD_MASK 16
108
109static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1102, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11225, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1140, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
116
Fredrik Lundhb389df32000-06-29 12:48:37 +0000117static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011810, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
11927, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12161, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
122108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
123122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
124106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
125120, 121, 122, 123, 124, 125, 126, 127 };
126
Fredrik Lundhb389df32000-06-29 12:48:37 +0000127static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000128{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000129 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000130}
131
132#define SRE_IS_DIGIT(ch)\
133 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
134#define SRE_IS_SPACE(ch)\
135 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
136#define SRE_IS_LINEBREAK(ch)\
137 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
138#define SRE_IS_ALNUM(ch)\
139 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
140#define SRE_IS_WORD(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000142
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000143/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000144
Fredrik Lundhb389df32000-06-29 12:48:37 +0000145static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000146{
147 return ((ch) < 256 ? tolower((ch)) : ch);
148}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000149#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
150#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
151#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
152#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
153#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
154
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000155/* unicode-specific character predicates */
156
157#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000158static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000159{
160 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
161}
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000162#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
163#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
164#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000165#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000166#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000167#endif
168
Guido van Rossumb700df92000-03-31 14:59:30 +0000169LOCAL(int)
170sre_category(SRE_CODE category, unsigned int ch)
171{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000172 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174 case SRE_CATEGORY_DIGIT:
175 return SRE_IS_DIGIT(ch);
176 case SRE_CATEGORY_NOT_DIGIT:
177 return !SRE_IS_DIGIT(ch);
178 case SRE_CATEGORY_SPACE:
179 return SRE_IS_SPACE(ch);
180 case SRE_CATEGORY_NOT_SPACE:
181 return !SRE_IS_SPACE(ch);
182 case SRE_CATEGORY_WORD:
183 return SRE_IS_WORD(ch);
184 case SRE_CATEGORY_NOT_WORD:
185 return !SRE_IS_WORD(ch);
186 case SRE_CATEGORY_LINEBREAK:
187 return SRE_IS_LINEBREAK(ch);
188 case SRE_CATEGORY_NOT_LINEBREAK:
189 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000190
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000191 case SRE_CATEGORY_LOC_WORD:
192 return SRE_LOC_IS_WORD(ch);
193 case SRE_CATEGORY_LOC_NOT_WORD:
194 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000195
196#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000197 case SRE_CATEGORY_UNI_DIGIT:
198 return SRE_UNI_IS_DIGIT(ch);
199 case SRE_CATEGORY_UNI_NOT_DIGIT:
200 return !SRE_UNI_IS_DIGIT(ch);
201 case SRE_CATEGORY_UNI_SPACE:
202 return SRE_UNI_IS_SPACE(ch);
203 case SRE_CATEGORY_UNI_NOT_SPACE:
204 return !SRE_UNI_IS_SPACE(ch);
205 case SRE_CATEGORY_UNI_WORD:
206 return SRE_UNI_IS_WORD(ch);
207 case SRE_CATEGORY_UNI_NOT_WORD:
208 return !SRE_UNI_IS_WORD(ch);
209 case SRE_CATEGORY_UNI_LINEBREAK:
210 return SRE_UNI_IS_LINEBREAK(ch);
211 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
212 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000213#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000214 }
215 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000216}
217
218/* helpers */
219
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000220static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000221mark_fini(SRE_STATE* state)
222{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000223 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000225 state->mark_stack = NULL;
226 }
227 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000228}
229
230static int
231mark_save(SRE_STATE* state, int lo, int hi)
232{
233 void* stack;
234 int size;
235 int minsize, newsize;
236
237 if (hi <= lo)
238 return 0;
239
240 size = (hi - lo) + 1;
241
242 newsize = state->mark_stack_size;
243 minsize = state->mark_stack_base + size;
244
245 if (newsize < minsize) {
246 /* create new stack */
247 if (!newsize) {
248 newsize = 512;
249 if (newsize < minsize)
250 newsize = minsize;
251 TRACE(("allocate stack %d\n", newsize));
252 stack = malloc(sizeof(void*) * newsize);
253 } else {
254 /* grow the stack */
255 while (newsize < minsize)
256 newsize += newsize;
257 TRACE(("grow stack to %d\n", newsize));
258 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
259 }
260 if (!stack) {
261 mark_fini(state);
262 return SRE_ERROR_MEMORY;
263 }
264 state->mark_stack = stack;
265 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000266 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000267
268 TRACE(("copy %d:%d to %d\n", lo, hi, state->mark_stack_base));
269
270 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
271 size * sizeof(void*));
272
273 state->mark_stack_base += size;
274
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000275 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000276}
277
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278static int
279mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000280{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000281 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000282
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000283 if (hi <= lo)
284 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000285
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000286 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000287
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000288 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000289
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000290 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000291
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000292 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
293 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000294
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000295 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000296}
297
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000298/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000299
300#define SRE_CHAR unsigned char
301#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000302#define SRE_COUNT sre_count
Guido van Rossumb700df92000-03-31 14:59:30 +0000303#define SRE_MEMBER sre_member
304#define SRE_MATCH sre_match
305#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000306
307#if defined(HAVE_UNICODE)
308
Guido van Rossumb700df92000-03-31 14:59:30 +0000309#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000310#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000311#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000312
Guido van Rossumb700df92000-03-31 14:59:30 +0000313#undef SRE_SEARCH
314#undef SRE_MATCH
315#undef SRE_MEMBER
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000316#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000317#undef SRE_AT
318#undef SRE_CHAR
319
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000320/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000321
322#define SRE_CHAR Py_UNICODE
323#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000324#define SRE_COUNT sre_ucount
Guido van Rossumb700df92000-03-31 14:59:30 +0000325#define SRE_MEMBER sre_umember
326#define SRE_MATCH sre_umatch
327#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000328#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000329
330#endif /* SRE_RECURSIVE */
331
332/* -------------------------------------------------------------------- */
333/* String matching engine */
334
335/* the following section is compiled twice, with different character
336 settings */
337
338LOCAL(int)
339SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
340{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000341 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000342
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000343 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000344
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000345 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000346
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000347 case SRE_AT_BEGINNING:
348 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000349
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000350 case SRE_AT_BEGINNING_LINE:
351 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000352 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000353
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000354 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000355 return (((void*) (ptr+1) == state->end &&
356 SRE_IS_LINEBREAK((int) ptr[0])) ||
357 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000358
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000359 case SRE_AT_END_LINE:
360 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000361 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000362
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000363 case SRE_AT_BOUNDARY:
364 if (state->beginning == state->end)
365 return 0;
366 that = ((void*) ptr > state->beginning) ?
367 SRE_IS_WORD((int) ptr[-1]) : 0;
368 this = ((void*) ptr < state->end) ?
369 SRE_IS_WORD((int) ptr[0]) : 0;
370 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000371
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000372 case SRE_AT_NON_BOUNDARY:
373 if (state->beginning == state->end)
374 return 0;
375 that = ((void*) ptr > state->beginning) ?
376 SRE_IS_WORD((int) ptr[-1]) : 0;
377 this = ((void*) ptr < state->end) ?
378 SRE_IS_WORD((int) ptr[0]) : 0;
379 return this == that;
380 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000382 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000383}
384
385LOCAL(int)
Fredrik Lundh0640e112000-06-30 13:55:15 +0000386SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000387{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000388 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000389
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000390 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000391
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000392 for (;;) {
393 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000396 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 if (ch == set[0])
398 return ok;
399 set++;
400 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000401
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000402 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000403 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000404 if (set[0] <= ch && ch <= set[1])
405 return ok;
406 set += 2;
407 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh3562f112000-07-02 12:00:07 +0000409 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000410 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000411 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
412 return ok;
413 set += 16;
414 break;
415
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000416 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000417 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000418 if (sre_category(set[0], (int) ch))
419 return ok;
420 set += 1;
421 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000422
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000423 case SRE_OP_NEGATE:
424 ok = !ok;
425 break;
426
427 case SRE_OP_FAILURE:
428 return !ok;
429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 default:
431 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000432 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 return 0;
434 }
435 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000436}
437
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000438LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
439
440LOCAL(int)
441SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
442{
443 SRE_CODE chr;
444 SRE_CHAR* ptr = state->ptr;
445 SRE_CHAR* end = state->end;
446 int i;
447
448 /* adjust end */
449 if (maxcount < end - ptr && maxcount != 65535)
450 end = ptr + maxcount;
451
452 switch (pattern[0]) {
453
454 case SRE_OP_ANY:
455 /* repeated dot wildcard. */
456 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
457 ptr++;
458 break;
459
460 case SRE_OP_ANY_ALL:
461 /* repeated dot wildcare. skip to the end of the target
462 string, and backtrack from there */
463 ptr = end;
464 break;
465
466 case SRE_OP_LITERAL:
467 /* repeated literal */
468 chr = pattern[1];
469 while (ptr < end && (SRE_CODE) *ptr == chr)
470 ptr++;
471 break;
472
473 case SRE_OP_LITERAL_IGNORE:
474 /* repeated literal */
475 chr = pattern[1];
476 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
477 ptr++;
478 break;
479
480 case SRE_OP_NOT_LITERAL:
481 /* repeated non-literal */
482 chr = pattern[1];
483 while (ptr < end && (SRE_CODE) *ptr != chr)
484 ptr++;
485 break;
486
487 case SRE_OP_NOT_LITERAL_IGNORE:
488 /* repeated non-literal */
489 chr = pattern[1];
490 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
491 ptr++;
492 break;
493
494 case SRE_OP_IN:
495 /* repeated set */
496 while (ptr < end && SRE_MEMBER(pattern + 2, *ptr))
497 ptr++;
498 break;
499
500 default:
501 /* repeated single character pattern */
502 while ((SRE_CHAR*) state->ptr < end) {
503 i = SRE_MATCH(state, pattern, level);
504 if (i < 0)
505 return i;
506 if (!i)
507 break;
508 }
509 return (SRE_CHAR*) state->ptr - ptr;
510 }
511
512 return ptr - (SRE_CHAR*) state->ptr;
513}
514
Guido van Rossumb700df92000-03-31 14:59:30 +0000515LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000516SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000517{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000518 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000519 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000520
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000521 SRE_CHAR* end = state->end;
522 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000523 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000524 SRE_REPEAT* rp;
525 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000526 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000527
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000528 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000530 TRACE(("%8d: enter %d\n", PTR(ptr), level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000531
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000532#if defined(USE_STACKCHECK)
533 if (level % 10 == 0 && PyOS_CheckStack()) {
534 return SRE_ERROR_RECURSION_LIMIT;
535#endif
536
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000537#if defined(USE_RECURSION_LIMIT)
538 if (level > USE_RECURSION_LIMIT)
539 return SRE_ERROR_RECURSION_LIMIT;
540#endif
541
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000542 if (pattern[0] == SRE_OP_INFO) {
543 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000544 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000545 if (pattern[3] && (end - ptr) < pattern[3]) {
546 TRACE(("reject (got %d chars, need %d)\n",
547 (end - ptr), pattern[3]));
548 return 0;
549 }
550 pattern += pattern[1] + 1;
551 }
552
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000553 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000554
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000555 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000556
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000557 case SRE_OP_FAILURE:
558 /* immediate failure */
559 TRACE(("%8d: failure\n", PTR(ptr)));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000560 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000561
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000562 case SRE_OP_SUCCESS:
563 /* end of pattern */
564 TRACE(("%8d: success\n", PTR(ptr)));
565 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000566 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000567
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000568 case SRE_OP_AT:
569 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000570 /* <AT> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000571 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
572 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000573 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000574 pattern++;
575 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000576
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000577 case SRE_OP_CATEGORY:
578 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000579 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000580 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000581 *ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000582 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000583 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000584 TRACE(("%8d: category ok\n", PTR(ptr)));
585 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000586 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000587 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000588
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000589 case SRE_OP_LITERAL:
590 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000591 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000592 TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
593 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000594 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000595 pattern++;
596 ptr++;
597 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000598
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000599 case SRE_OP_NOT_LITERAL:
600 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000601 /* <NOT_LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000602 TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
603 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000604 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000605 pattern++;
606 ptr++;
607 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000608
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000609 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000610 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000611 /* <ANY> */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000612 TRACE(("%8d: anything (except newline)\n", PTR(ptr)));
613 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
614 return 0;
615 ptr++;
616 break;
617
618 case SRE_OP_ANY_ALL:
619 /* match anything */
620 /* <ANY_ALL> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000621 TRACE(("%8d: anything\n", PTR(ptr)));
622 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000623 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000624 ptr++;
625 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000626
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000627 case SRE_OP_IN:
628 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000629 /* <IN> <skip> <set> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000630 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
631 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000632 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000633 pattern += pattern[0];
634 ptr++;
635 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000636
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000637 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000638 /* match backreference */
639 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
640 i = pattern[0];
641 {
642 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
643 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
644 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000645 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000646 while (p < e) {
647 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000648 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000649 p++; ptr++;
650 }
651 }
652 pattern++;
653 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000654
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000655 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000656 /* match backreference */
657 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
658 i = pattern[0];
659 {
660 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
661 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
662 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000663 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000664 while (p < e) {
665 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000666 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000667 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000668 p++; ptr++;
669 }
670 }
671 pattern++;
672 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000673
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674 case SRE_OP_LITERAL_IGNORE:
675 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
676 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000677 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000678 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000679 pattern++;
680 ptr++;
681 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000682
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000683 case SRE_OP_NOT_LITERAL_IGNORE:
684 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
685 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000686 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000687 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000688 pattern++;
689 ptr++;
690 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000691
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000692 case SRE_OP_IN_IGNORE:
693 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
694 if (ptr >= end
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000695 || !SRE_MEMBER(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000696 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000697 pattern += pattern[0];
698 ptr++;
699 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000700
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000701 case SRE_OP_MARK:
702 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000703 /* <MARK> <gid> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000704 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000705 i = pattern[0];
706 if (i & 1)
707 state->lastindex = i/2 + 1;
708 if (i > state->lastmark)
709 state->lastmark = i;
710 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000711 pattern++;
712 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000713
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000714 case SRE_OP_JUMP:
715 case SRE_OP_INFO:
716 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000717 /* <JUMP> <offset> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000718 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
719 pattern += pattern[0];
720 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000721
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000722 case SRE_OP_ASSERT:
723 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000724 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 TRACE(("%8d: assert subpattern %d\n", PTR(ptr), pattern[1]));
726 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000727 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000728 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000729 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000730 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000731 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000732 if (pattern[1] > 0 && state->ptr != ptr)
733 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 pattern += pattern[0];
735 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000736
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000737 case SRE_OP_ASSERT_NOT:
738 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000739 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 TRACE(("%8d: assert not subpattern %d\n", PTR(ptr), pattern[1]));
741 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000742 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000743 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000744 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000745 if (i < 0)
746 return i;
747 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000748 return 0;
749 if (pattern[1] > 0 && state->ptr != ptr)
750 return SRE_ERROR_STATE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000751 pattern += pattern[0];
752 break;
753
754 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000755 /* alternation */
756 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 TRACE(("%8d: branch\n", PTR(ptr)));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000758 lastmark = state->lastmark;
759 while (pattern[0]) {
760 SRE_CODE* code = pattern+1;
761 TRACE(("%8d: try branch\n", PTR(ptr)));
762 switch (code[0]) {
763 case SRE_OP_IN:
764 if (ptr >= end || !SRE_MEMBER(code + 2, ptr[0]))
765 break;
766 code += code[1] + 1;
767 state->ptr = ptr + 1;
768 goto branch;
769 case SRE_OP_LITERAL:
770 if (ptr >= end || (SRE_CODE) ptr[0] != code[1])
771 break;
772 code += 2;
773 state->ptr = ptr + 1;
774 goto branch;
775 default:
776 state->ptr = ptr;
777 branch:
778 i = SRE_MATCH(state, code, level + 1);
779 if (i)
780 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000781 while (state->lastmark > lastmark)
782 state->mark[state->lastmark--] = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000783 }
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000784 pattern += pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000785 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000786 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000787
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000788 case SRE_OP_REPEAT_ONE:
789 /* match repeated sequence (maximizing regexp) */
790
791 /* this operator only works if the repeated item is
792 exactly one character wide, and we're not already
793 collecting backtracking points. for other cases,
794 use the MAX_REPEAT operator instead */
795
796 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
797
798 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
799 pattern[1], pattern[2]));
800
Fredrik Lundhe1869832000-08-01 22:47:49 +0000801 if (ptr + pattern[1] > end)
802 return 0; /* cannot match */
803
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000804 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000805
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000806 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
807 if (count < 0)
808 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000809
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000810 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000811
812 /* when we arrive here, count contains the number of
813 matches, and ptr points to the tail of the target
814 string. check if the rest of the pattern matches,
815 and backtrack if not. */
816
817 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
818
819 if (count < (int) pattern[1])
820 return 0;
821
822 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
823 /* tail is empty. we're finished */
824 TRACE(("%8d: tail is empty\n", PTR(ptr)));
825 state->ptr = ptr;
826 return 1;
827
828 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
829 /* tail starts with a literal. skip positions where
830 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000831 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000832 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
833 for (;;) {
834 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
835 while (count >= (int) pattern[1] &&
836 (ptr >= end || *ptr != chr)) {
837 ptr--;
838 count--;
839 }
840 TRACE(("%8d: check tail\n", PTR(ptr)));
841 if (count < (int) pattern[1])
842 break;
843 state->ptr = ptr;
844 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
845 if (i > 0) {
846 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
847 return 1;
848 }
849 ptr--;
850 count--;
851 }
852
853 } else {
854 /* general case */
855 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
856 while (count >= (int) pattern[1]) {
857 state->ptr = ptr;
858 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
859 if (i < 0)
860 return i;
861 if (i) {
862 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
863 return 1;
864 }
865 ptr--;
866 count--;
867 }
868 }
869 return 0;
870
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000871 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000872 /* create repeat context. all the hard work is done
873 by the UNTIL operator */
874 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
875 TRACE(("%8d: repeat {%d,%d}\n", PTR(ptr),
876 pattern[1], pattern[2]));
877
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000878 rep.count = -1;
879 rep.pattern = pattern;
880
881 /* install new repeat context */
882 rep.prev = state->repeat;
883 state->repeat = &rep;
884
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000885 state->ptr = ptr;
886 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000887
888 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000889
890 return i;
891
892 case SRE_OP_MAX_UNTIL:
893 /* maximizing repeat */
894 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
895
896 /* FIXME: we probably need to deal with zero-width
897 matches in here... */
898
899 rp = state->repeat;
900 if (!rp)
901 return SRE_ERROR_STATE;
902
903 state->ptr = ptr;
904
905 count = rp->count + 1;
906
907 TRACE(("%8d: max until %d\n", PTR(ptr), count));
908
909 if (count < rp->pattern[1]) {
910 /* not enough matches */
911 TRACE(("%8d: match item (required)\n", PTR(ptr)));
912 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000913 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000914 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000915 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000916 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000917 rp->count = count - 1;
918 state->ptr = ptr;
919 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000920 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000921
922 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
923 TRACE(("%8d: match item (optional)\n", PTR(ptr)));
924 /* we may have enough matches, but if we can
925 match another item, do so */
926 rp->count = count;
927 lastmark = state->lastmark;
928 mark_save(state, 0, lastmark);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000929 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000930 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000931 if (i)
932 return i;
933 mark_restore(state, 0, lastmark);
934 rp->count = count - 1;
935 state->ptr = ptr;
936 }
937
938 /* cannot match more repeated items here. make sure the
939 tail matches */
940 TRACE(("%8d: match tail\n", PTR(ptr)));
941 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000942 i = SRE_MATCH(state, pattern, level + 1);
943 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000944 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000945 state->repeat = rp;
946 return 0;
947
948 case SRE_OP_MIN_UNTIL:
949 /* minimizing repeat */
950 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
951
952 rp = state->repeat;
953 if (!rp)
954 return SRE_ERROR_STATE;
955
956 count = rp->count + 1;
957
958 TRACE(("%8d: min until %d\n", PTR(ptr), count));
959
960 state->ptr = ptr;
961
962 if (count < rp->pattern[1]) {
963 /* not enough matches */
964 TRACE(("%8d: match item (required)\n", PTR(ptr)));
965 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000966 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000967 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000968 if (i)
969 return i;
970 rp->count = count-1;
971 state->ptr = ptr;
972 return 0;
973 }
974
975 /* see if the tail matches */
976 TRACE(("%8d: match tail\n", PTR(ptr)));
977 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000978 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000979 if (i) {
980 /* free(rp); */
981 return i;
982 }
983 state->repeat = rp;
984
985 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
986 return 0;
987
988 TRACE(("%8d: match item (optional)\n", PTR(ptr)));
989 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000990 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000991 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000992 if (i)
993 return i;
994 rp->count = count - 1;
995 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000996
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000997 default:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000998 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
999 return SRE_ERROR_ILLEGAL;
1000 }
1001 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001002
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001003 /* shouldn't end up here */
1004 return SRE_ERROR_ILLEGAL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001005}
1006
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001007LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001008SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1009{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001010 SRE_CHAR* ptr = state->start;
1011 SRE_CHAR* end = state->end;
1012 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001013 int prefix_len = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001014 SRE_CODE* prefix = NULL;
1015 SRE_CODE* charset = NULL;
1016 SRE_CODE* overlap = NULL;
1017 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001018
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001019 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001020 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001021 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001022
1023 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001024
1025 if (pattern[3] > 0) {
1026 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001027 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001028 end -= pattern[3]-1;
1029 if (end <= ptr)
1030 end = ptr+1;
1031 }
1032
Fredrik Lundh3562f112000-07-02 12:00:07 +00001033 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001034 /* pattern starts with a known prefix */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001035 prefix_len = pattern[5];
1036 prefix = pattern + 6;
1037 overlap = prefix + prefix_len - 1;
1038 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001039 /* pattern starts with a character from a known set */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001040 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001041
1042 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001043 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001044
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001045#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001046 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001047 /* pattern starts with a known prefix. use the overlap
1048 table to skip forward as fast as we possibly can */
1049 int i = 0;
1050 end = state->end;
1051 while (ptr < end) {
1052 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001053 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001054 if (!i)
1055 break;
1056 else
1057 i = overlap[i];
1058 } else {
1059 if (++i == prefix_len) {
1060 /* found a potential match */
1061 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
1062 state->start = ptr - prefix_len + 1;
1063 state->ptr = ptr + 1;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001064 if (flags & SRE_INFO_LITERAL)
1065 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001066 status = SRE_MATCH(state, pattern + 2*prefix_len, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001067 if (status != 0)
1068 return status;
1069 /* close but no cigar -- try again */
1070 i = overlap[i];
1071 }
1072 break;
1073 }
1074
1075 }
1076 ptr++;
1077 }
1078 return 0;
1079 }
1080#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001081
Fredrik Lundh3562f112000-07-02 12:00:07 +00001082 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001083 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001084 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001085 SRE_CODE chr = pattern[1];
1086 for (;;) {
1087 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1088 ptr++;
1089 if (ptr == end)
1090 return 0;
1091 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
1092 state->start = ptr;
1093 state->ptr = ++ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001094 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001095 if (status != 0)
1096 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001097 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001098 } else if (charset) {
1099 /* pattern starts with a character from a known set */
1100 for (;;) {
1101 while (ptr < end && !SRE_MEMBER(charset, ptr[0]))
1102 ptr++;
1103 if (ptr == end)
1104 return 0;
1105 TRACE(("%8d: === SEARCH === charset\n", PTR(ptr)));
1106 state->start = ptr;
1107 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001108 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001109 if (status != 0)
1110 break;
1111 }
1112 } else
1113 /* general case */
1114 while (ptr <= end) {
1115 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
1116 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001117 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001118 if (status != 0)
1119 break;
1120 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001121
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001122 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001123}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001124
Guido van Rossumb700df92000-03-31 14:59:30 +00001125
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001126#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001127
1128/* -------------------------------------------------------------------- */
1129/* factories and destructors */
1130
1131/* see sre.h for object declarations */
1132
1133staticforward PyTypeObject Pattern_Type;
1134staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001135staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001136
1137static PyObject *
1138_compile(PyObject* self_, PyObject* args)
1139{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001140 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001141
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001142 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001143 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001144
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001145 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001146 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001147 PyObject* code;
1148 int groups = 0;
1149 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001150 PyObject* indexgroup = NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001151 if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
Fredrik Lundhc2301732000-07-02 22:25:39 +00001152 &groups, &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001153 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001154
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001155 code = PySequence_Fast(code, "code argument must be a sequence");
1156 if (!code)
1157 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001159#if PY_VERSION_HEX >= 0x01060000
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001160 n = PySequence_Size(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001161#else
1162 n = PySequence_Length(code);
1163#endif
Fredrik Lundh6f013982000-07-03 18:44:21 +00001164
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001165 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
1166 if (!self) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001167 Py_DECREF(code);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001168 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001169 }
1170
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001171 for (i = 0; i < n; i++) {
1172 PyObject *o = PySequence_Fast_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001173 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001174 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001175
1176 Py_DECREF(code);
1177
1178 if (PyErr_Occurred())
1179 return NULL;
1180
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001181 Py_INCREF(pattern);
1182 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001183
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001184 self->flags = flags;
1185
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001186 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001187
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001188 Py_XINCREF(groupindex);
1189 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001190
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001191 Py_XINCREF(indexgroup);
1192 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001193
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001194 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001195}
1196
1197static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001198sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001199{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001200 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001201}
1202
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001203static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001204sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001205{
1206 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001207 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001208 return NULL;
1209 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001210 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001211#if defined(HAVE_UNICODE)
1212 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001213 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001214#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001215 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001216}
1217
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001218LOCAL(void)
1219state_reset(SRE_STATE* state)
1220{
1221 int i;
1222
1223 state->lastmark = 0;
1224
1225 /* FIXME: dynamic! */
1226 for (i = 0; i < SRE_MARK_SIZE; i++)
1227 state->mark[i] = NULL;
1228
1229 state->lastindex = -1;
1230
1231 state->repeat = NULL;
1232
1233 mark_fini(state);
1234}
1235
Guido van Rossumb700df92000-03-31 14:59:30 +00001236LOCAL(PyObject*)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001237state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1238 int start, int end)
Guido van Rossumb700df92000-03-31 14:59:30 +00001239{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001240 /* prepare state object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001241
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 PyBufferProcs *buffer;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001243 int size, bytes;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001244 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001245
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001246 memset(state, 0, sizeof(SRE_STATE));
1247
1248 state->lastindex = -1;
1249
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001250 /* get pointer to string buffer */
1251 buffer = string->ob_type->tp_as_buffer;
1252 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1253 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001254 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001255 return NULL;
1256 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001257
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001258 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001259 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1260 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001261 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1262 return NULL;
1263 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001264
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001265 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001266
1267#if PY_VERSION_HEX >= 0x01060000
1268 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001269#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001270 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001271#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001272
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001273 if (PyString_Check(string) || bytes == size)
1274 state->charsize = 1;
1275#if defined(HAVE_UNICODE)
1276 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
1277 state->charsize = sizeof(Py_UNICODE);
1278#endif
1279 else {
1280 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1281 return NULL;
1282 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001283
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001284 /* adjust boundaries */
1285 if (start < 0)
1286 start = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001287 else if (start > size)
1288 start = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001289
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001290 if (end < 0)
1291 end = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001292 else if (end > size)
1293 end = size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001294
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001295 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001296
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001297 state->start = (void*) ((char*) ptr + start * state->charsize);
1298 state->end = (void*) ((char*) ptr + end * state->charsize);
1299
1300 Py_INCREF(string);
1301 state->string = string;
1302 state->pos = start;
1303 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001304
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001305 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001306 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001307#if defined(HAVE_UNICODE)
1308 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001309 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001310#endif
1311 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001312 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001313
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001314 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001315}
1316
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001317LOCAL(void)
1318state_fini(SRE_STATE* state)
1319{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001320 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001321 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001322}
1323
1324LOCAL(PyObject*)
1325state_getslice(SRE_STATE* state, int index, PyObject* string)
1326{
1327 index = (index - 1) * 2;
1328
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001329 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1330 Py_INCREF(Py_None);
1331 return Py_None;
1332 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001333
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001334 return PySequence_GetSlice(
1335 string,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001336 ((char*)state->mark[index] - (char*)state->beginning) /
1337 state->charsize,
1338 ((char*)state->mark[index+1] - (char*)state->beginning) /
1339 state->charsize
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001340 );
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001341}
1342
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001343static void
1344pattern_error(int status)
1345{
1346 switch (status) {
1347 case SRE_ERROR_RECURSION_LIMIT:
1348 PyErr_SetString(
1349 PyExc_RuntimeError,
1350 "maximum recursion limit exceeded"
1351 );
1352 break;
1353 case SRE_ERROR_MEMORY:
1354 PyErr_NoMemory();
1355 break;
1356 default:
1357 /* other error codes indicate compiler/engine bugs */
1358 PyErr_SetString(
1359 PyExc_RuntimeError,
1360 "internal error in regular expression engine"
1361 );
1362 }
1363}
1364
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001365static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001366pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001367{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001368 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001369
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001370 MatchObject* match;
1371 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001372 char* base;
1373 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001374
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001375 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001376
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001377 /* create match object (with room for extra group marks) */
1378 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001379 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001380 if (!match)
1381 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001382
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001383 Py_INCREF(pattern);
1384 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001385
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001386 Py_INCREF(state->string);
1387 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001389 match->regs = NULL;
1390 match->groups = pattern->groups+1;
1391
1392 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001393
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001394 base = (char*) state->beginning;
1395 n = state->charsize;
1396
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001397 match->mark[0] = ((char*) state->start - base) / n;
1398 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001399
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001400 for (i = j = 0; i < pattern->groups; i++, j+=2)
1401 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1402 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1403 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1404 } else
1405 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1406
1407 match->pos = state->pos;
1408 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001409
Fredrik Lundh6f013982000-07-03 18:44:21 +00001410 match->lastindex = state->lastindex;
1411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001412 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001413
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001414 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001415
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001416 /* no match */
1417 Py_INCREF(Py_None);
1418 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001420 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001421
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001422 /* internal error */
1423 pattern_error(status);
1424 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001425}
1426
1427static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001428pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001429{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001430 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001432 ScannerObject* self;
1433
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001434 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001435 int start = 0;
1436 int end = INT_MAX;
1437 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1438 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001439
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001440 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001441 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001442 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001443 return NULL;
1444
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001445 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001446 if (!string) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001447 PyObject_Del(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001448 return NULL;
1449 }
1450
1451 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001452 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001453
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001454 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001455}
1456
Guido van Rossumb700df92000-03-31 14:59:30 +00001457static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001458pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001459{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001460 Py_XDECREF(self->pattern);
1461 Py_XDECREF(self->groupindex);
1462 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001463}
1464
1465static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001466pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001467{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001468 SRE_STATE state;
1469 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001470
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001471 PyObject* string;
1472 int start = 0;
1473 int end = INT_MAX;
1474 if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
1475 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001476
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001477 string = state_init(&state, self, string, start, end);
1478 if (!string)
1479 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001480
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001481 state.ptr = state.start;
1482
1483 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001484 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001485 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001486#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001487 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001488#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001489 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001490
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001491 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001492
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001493 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001494}
1495
1496static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001497pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001498{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001499 SRE_STATE state;
1500 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001501
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001502 PyObject* string;
1503 int start = 0;
1504 int end = INT_MAX;
1505 if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
1506 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001507
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001508 string = state_init(&state, self, string, start, end);
1509 if (!string)
1510 return NULL;
1511
1512 if (state.charsize == 1) {
1513 status = sre_search(&state, PatternObject_GetCode(self));
1514 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001515#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001516 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001517#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001518 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001519
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001520 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001521
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001522 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001523}
1524
1525static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001526call(char* function, PyObject* args)
1527{
1528 PyObject* name;
1529 PyObject* module;
1530 PyObject* func;
1531 PyObject* result;
1532
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001533 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001534 if (!name)
1535 return NULL;
1536 module = PyImport_Import(name);
1537 Py_DECREF(name);
1538 if (!module)
1539 return NULL;
1540 func = PyObject_GetAttrString(module, function);
1541 Py_DECREF(module);
1542 if (!func)
1543 return NULL;
1544 result = PyObject_CallObject(func, args);
1545 Py_DECREF(func);
1546 Py_DECREF(args);
1547 return result;
1548}
1549
1550static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001551pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001552{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001553 PyObject* template;
1554 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001555 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001556 if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
1557 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001558
1559 /* delegate to Python code */
1560 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1561}
1562
1563static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001564pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001565{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001566 PyObject* template;
1567 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001568 PyObject* count = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001569 if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
1570 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001571
1572 /* delegate to Python code */
1573 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1574}
1575
1576static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001577pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001578{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001579 PyObject* string;
Fredrik Lundh28552902000-07-05 21:14:16 +00001580 PyObject* maxsplit = Py_False; /* zero */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001581 if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
1582 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001583
1584 /* delegate to Python code */
1585 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1586}
1587
1588static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001589pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001590{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001591 SRE_STATE state;
1592 PyObject* list;
1593 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001594 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001595
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001596 PyObject* string;
1597 int start = 0;
1598 int end = INT_MAX;
1599 if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
1600 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001602 string = state_init(&state, self, string, start, end);
1603 if (!string)
1604 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001606 list = PyList_New(0);
Guido van Rossumb700df92000-03-31 14:59:30 +00001607
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001608 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001609
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001610 PyObject* item;
1611
1612 state.ptr = state.start;
1613
1614 if (state.charsize == 1) {
1615 status = sre_search(&state, PatternObject_GetCode(self));
1616 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001617#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001618 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001619#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001620 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001621
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001622 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001623
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001624 /* don't bother to build a match object */
1625 switch (self->groups) {
1626 case 0:
1627 item = PySequence_GetSlice(
1628 string,
1629 ((char*) state.start - (char*) state.beginning) /
1630 state.charsize,
1631 ((char*) state.ptr - (char*) state.beginning) /
1632 state.charsize);
1633 if (!item)
1634 goto error;
1635 break;
1636 case 1:
1637 item = state_getslice(&state, 1, string);
1638 if (!item)
1639 goto error;
1640 break;
1641 default:
1642 item = PyTuple_New(self->groups);
1643 if (!item)
1644 goto error;
1645 for (i = 0; i < self->groups; i++) {
1646 PyObject* o = state_getslice(&state, i+1, string);
1647 if (!o) {
1648 Py_DECREF(item);
1649 goto error;
1650 }
1651 PyTuple_SET_ITEM(item, i, o);
1652 }
1653 break;
1654 }
1655
1656 if (PyList_Append(list, item) < 0) {
1657 Py_DECREF(item);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001658 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001659 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001660
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001661 if (state.ptr == state.start)
1662 state.start = (void*) ((char*) state.ptr + state.charsize);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001663 else
1664 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001665
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001666 } else {
Guido van Rossumb700df92000-03-31 14:59:30 +00001667
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001668 if (status == 0)
1669 break;
1670
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001671 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001672 goto error;
Guido van Rossumb700df92000-03-31 14:59:30 +00001673
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001674 }
1675 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001676
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001677 state_fini(&state);
1678 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001679
1680error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001681 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001682 state_fini(&state);
1683 return NULL;
1684
Guido van Rossumb700df92000-03-31 14:59:30 +00001685}
1686
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001687static PyMethodDef pattern_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001688 {"match", (PyCFunction) pattern_match, 1},
1689 {"search", (PyCFunction) pattern_search, 1},
1690 {"sub", (PyCFunction) pattern_sub, 1},
1691 {"subn", (PyCFunction) pattern_subn, 1},
1692 {"split", (PyCFunction) pattern_split, 1},
1693 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001694 /* experimental */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001695 {"scanner", (PyCFunction) pattern_scanner, 1},
1696 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001697};
1698
1699static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001700pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001701{
1702 PyObject* res;
1703
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001704 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001705
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001706 if (res)
1707 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00001708
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001709 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00001710
1711 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001712 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001713 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001714 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001715 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001716
1717 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001718 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001719
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001720 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001721 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001722
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001723 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001724 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001725 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001726 }
1727
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001728 PyErr_SetString(PyExc_AttributeError, name);
1729 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001730}
1731
1732statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001733 PyObject_HEAD_INIT(NULL)
1734 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001735 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001736 (destructor)pattern_dealloc, /*tp_dealloc*/
1737 0, /*tp_print*/
1738 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001739};
1740
1741/* -------------------------------------------------------------------- */
1742/* match methods */
1743
1744static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001745match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001746{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001747 Py_XDECREF(self->regs);
1748 Py_XDECREF(self->string);
1749 Py_DECREF(self->pattern);
1750 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001751}
1752
1753static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001754match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001755{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001756 if (index < 0 || index >= self->groups) {
1757 /* raise IndexError if we were given a bad group number */
1758 PyErr_SetString(
1759 PyExc_IndexError,
1760 "no such group"
1761 );
1762 return NULL;
1763 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001764
Fredrik Lundh6f013982000-07-03 18:44:21 +00001765 index *= 2;
1766
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001767 if (self->string == Py_None || self->mark[index] < 0) {
1768 /* return default value if the string or group is undefined */
1769 Py_INCREF(def);
1770 return def;
1771 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001772
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001773 return PySequence_GetSlice(
1774 self->string, self->mark[index], self->mark[index+1]
1775 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001776}
1777
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001778static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001779match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001780{
Fredrik Lundh6f013982000-07-03 18:44:21 +00001781 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001782
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001783 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001784 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001785
Fredrik Lundh6f013982000-07-03 18:44:21 +00001786 i = -1;
1787
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001788 if (self->pattern->groupindex) {
1789 index = PyObject_GetItem(self->pattern->groupindex, index);
1790 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00001791 if (PyInt_Check(index))
1792 i = (int) PyInt_AS_LONG(index);
1793 Py_DECREF(index);
1794 } else
1795 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001796 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001797
1798 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001799}
1800
1801static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001802match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001803{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001804 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001805}
1806
1807static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001808match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001809{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001810 PyObject* result;
1811 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001812
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001813 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001814
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001815 switch (size) {
1816 case 0:
1817 result = match_getslice(self, Py_False, Py_None);
1818 break;
1819 case 1:
1820 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
1821 break;
1822 default:
1823 /* fetch multiple items */
1824 result = PyTuple_New(size);
1825 if (!result)
1826 return NULL;
1827 for (i = 0; i < size; i++) {
1828 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001829 self, PyTuple_GET_ITEM(args, i), Py_None
1830 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001831 if (!item) {
1832 Py_DECREF(result);
1833 return NULL;
1834 }
1835 PyTuple_SET_ITEM(result, i, item);
1836 }
1837 break;
1838 }
1839 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001840}
1841
1842static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001843match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001844{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001845 PyObject* result;
1846 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001847
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001848 PyObject* def = Py_None;
1849 if (!PyArg_ParseTuple(args, "|O:groups", &def))
1850 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001851
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001852 result = PyTuple_New(self->groups-1);
1853 if (!result)
1854 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001855
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001856 for (index = 1; index < self->groups; index++) {
1857 PyObject* item;
1858 item = match_getslice_by_index(self, index, def);
1859 if (!item) {
1860 Py_DECREF(result);
1861 return NULL;
1862 }
1863 PyTuple_SET_ITEM(result, index-1, item);
1864 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001865
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001866 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001867}
1868
1869static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001870match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001871{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 PyObject* result;
1873 PyObject* keys;
1874 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00001875
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001876 PyObject* def = Py_None;
1877 if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
1878 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001879
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001880 result = PyDict_New();
1881 if (!result || !self->pattern->groupindex)
1882 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001883
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001884 keys = PyMapping_Keys(self->pattern->groupindex);
1885 if (!keys) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001886 Py_DECREF(result);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001887 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001888 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001889
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001890 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
1891 PyObject* key;
1892 PyObject* item;
1893 key = PyList_GET_ITEM(keys, index);
1894 if (!key) {
1895 Py_DECREF(keys);
1896 Py_DECREF(result);
1897 return NULL;
1898 }
1899 item = match_getslice(self, key, def);
1900 if (!item) {
1901 Py_DECREF(key);
1902 Py_DECREF(keys);
1903 Py_DECREF(result);
1904 return NULL;
1905 }
1906 /* FIXME: <fl> this can fail, right? */
1907 PyDict_SetItem(result, key, item);
1908 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001909
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001910 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00001911
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001912 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001913}
1914
1915static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001916match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001917{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001918 int index;
1919
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001920 PyObject* index_ = Py_False; /* zero */
1921 if (!PyArg_ParseTuple(args, "|O:start", &index_))
1922 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001923
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001924 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001925
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001926 if (index < 0 || index >= self->groups) {
1927 PyErr_SetString(
1928 PyExc_IndexError,
1929 "no such group"
1930 );
1931 return NULL;
1932 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 if (self->mark[index*2] < 0) {
1935 Py_INCREF(Py_None);
1936 return Py_None;
1937 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001938
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001939 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00001940}
1941
1942static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001943match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001944{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001945 int index;
1946
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001947 PyObject* index_ = Py_False; /* zero */
1948 if (!PyArg_ParseTuple(args, "|O:end", &index_))
1949 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001950
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001951 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001952
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001953 if (index < 0 || index >= self->groups) {
1954 PyErr_SetString(
1955 PyExc_IndexError,
1956 "no such group"
1957 );
1958 return NULL;
1959 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001960
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001961 if (self->mark[index*2] < 0) {
1962 Py_INCREF(Py_None);
1963 return Py_None;
1964 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001965
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001966 return Py_BuildValue("i", self->mark[index*2+1]);
1967}
1968
1969LOCAL(PyObject*)
1970_pair(int i1, int i2)
1971{
1972 PyObject* pair;
1973 PyObject* item;
1974
1975 pair = PyTuple_New(2);
1976 if (!pair)
1977 return NULL;
1978
1979 item = PyInt_FromLong(i1);
1980 if (!item)
1981 goto error;
1982 PyTuple_SET_ITEM(pair, 0, item);
1983
1984 item = PyInt_FromLong(i2);
1985 if (!item)
1986 goto error;
1987 PyTuple_SET_ITEM(pair, 1, item);
1988
1989 return pair;
1990
1991 error:
1992 Py_DECREF(pair);
1993 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001994}
1995
1996static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001997match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001998{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001999 int index;
2000
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002001 PyObject* index_ = Py_False; /* zero */
2002 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2003 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002004
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002005 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002006
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002007 if (index < 0 || index >= self->groups) {
2008 PyErr_SetString(
2009 PyExc_IndexError,
2010 "no such group"
2011 );
2012 return NULL;
2013 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002014
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002015 if (self->mark[index*2] < 0) {
2016 Py_INCREF(Py_None);
2017 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002018 return Py_BuildValue("OO", Py_None, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002019 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002020
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002021 return _pair(self->mark[index*2], self->mark[index*2+1]);
2022}
2023
2024static PyObject*
2025match_regs(MatchObject* self)
2026{
2027 PyObject* regs;
2028 PyObject* item;
2029 int index;
2030
2031 regs = PyTuple_New(self->groups);
2032 if (!regs)
2033 return NULL;
2034
2035 for (index = 0; index < self->groups; index++) {
2036 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2037 if (!item) {
2038 Py_DECREF(regs);
2039 return NULL;
2040 }
2041 PyTuple_SET_ITEM(regs, index, item);
2042 }
2043
2044 Py_INCREF(regs);
2045 self->regs = regs;
2046
2047 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002048}
2049
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002050static PyMethodDef match_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002051 {"group", (PyCFunction) match_group, 1},
2052 {"start", (PyCFunction) match_start, 1},
2053 {"end", (PyCFunction) match_end, 1},
2054 {"span", (PyCFunction) match_span, 1},
2055 {"groups", (PyCFunction) match_groups, 1},
2056 {"groupdict", (PyCFunction) match_groupdict, 1},
2057 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002058};
2059
2060static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002061match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002062{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002063 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002064
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002065 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2066 if (res)
2067 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002068
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002069 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002070
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002071 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002072 if (self->lastindex >= 0)
2073 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002074 Py_INCREF(Py_None);
2075 return Py_None;
2076 }
2077
2078 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002079 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002080 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002081 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002082 );
2083 if (result)
2084 return result;
2085 PyErr_Clear();
2086 }
2087 Py_INCREF(Py_None);
2088 return Py_None;
2089 }
2090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002091 if (!strcmp(name, "string")) {
2092 if (self->string) {
2093 Py_INCREF(self->string);
2094 return self->string;
2095 } else {
2096 Py_INCREF(Py_None);
2097 return Py_None;
2098 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002099 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002101 if (!strcmp(name, "regs")) {
2102 if (self->regs) {
2103 Py_INCREF(self->regs);
2104 return self->regs;
2105 } else
2106 return match_regs(self);
2107 }
2108
2109 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002110 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002111 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002112 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002113
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002114 if (!strcmp(name, "pos"))
2115 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002116
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002117 if (!strcmp(name, "endpos"))
2118 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002119
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002120 PyErr_SetString(PyExc_AttributeError, name);
2121 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002122}
2123
2124/* FIXME: implement setattr("string", None) as a special case (to
2125 detach the associated string, if any */
2126
2127statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002128 PyObject_HEAD_INIT(NULL)
2129 0, "SRE_Match",
2130 sizeof(MatchObject), sizeof(int),
2131 (destructor)match_dealloc, /*tp_dealloc*/
2132 0, /*tp_print*/
2133 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002134};
2135
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002136/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002137/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002138
2139static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002140scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002141{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002142 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002143 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002144 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002145}
2146
2147static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002148scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002149{
2150 SRE_STATE* state = &self->state;
2151 PyObject* match;
2152 int status;
2153
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002154 state_reset(state);
2155
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002156 state->ptr = state->start;
2157
2158 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002159 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002160 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002161#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002162 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002163#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002164 }
2165
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002166 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002167 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002168
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002169 if (status == 0 || state->ptr == state->start)
2170 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002171 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002172 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002173
2174 return match;
2175}
2176
2177
2178static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002179scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002180{
2181 SRE_STATE* state = &self->state;
2182 PyObject* match;
2183 int status;
2184
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002185 state_reset(state);
2186
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002187 state->ptr = state->start;
2188
2189 if (state->charsize == 1) {
2190 status = sre_search(state, PatternObject_GetCode(self->pattern));
2191 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002192#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002193 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002194#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002195 }
2196
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002197 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002198 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002199
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002200 if (status == 0 || state->ptr == state->start)
2201 state->start = (void*) ((char*) state->ptr + state->charsize);
2202 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002203 state->start = state->ptr;
2204
2205 return match;
2206}
2207
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002208static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002209 {"match", (PyCFunction) scanner_match, 0},
2210 {"search", (PyCFunction) scanner_search, 0},
2211 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002212};
2213
2214static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002215scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002216{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002217 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002218
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002219 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2220 if (res)
2221 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002222
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002223 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002224
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002225 /* attributes */
2226 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002227 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002228 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002229 }
2230
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002231 PyErr_SetString(PyExc_AttributeError, name);
2232 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002233}
2234
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002235statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002236 PyObject_HEAD_INIT(NULL)
2237 0, "SRE_Scanner",
2238 sizeof(ScannerObject), 0,
2239 (destructor)scanner_dealloc, /*tp_dealloc*/
2240 0, /*tp_print*/
2241 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002242};
2243
Guido van Rossumb700df92000-03-31 14:59:30 +00002244static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002245 {"compile", _compile, 1},
2246 {"getcodesize", sre_codesize, 1},
2247 {"getlower", sre_getlower, 1},
2248 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002249};
2250
2251void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002252#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00002253__declspec(dllexport)
2254#endif
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002255init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002256{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002257 /* Patch object types */
2258 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002259 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002260
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002261 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002262}
2263
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002264#endif /* !defined(SRE_RECURSIVE) */