blob: 3d6305a6ae222e1bd3cde5b003b0f6166727ceee [file] [log] [blame]
Guido van Rossumb700df92000-03-31 14:59:30 +00001/* -*- Mode: C; tab-width: 4 -*-
2 *
3 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00004 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00005 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00006 *
7 * partial history:
Fredrik Lundh436c3d52000-06-29 08:58:44 +00008 * 99-10-24 fl created (based on existing template matcher code)
Guido van Rossumb700df92000-03-31 14:59:30 +00009 * 99-11-13 fl added categories, branching, and more (0.2)
10 * 99-11-16 fl some tweaks to compile on non-Windows platforms
11 * 99-12-18 fl non-literals, generic maximizing repeat (0.3)
Fredrik Lundh436c3d52000-06-29 08:58:44 +000012 * 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
13 * 00-03-06 fl first alpha, sort of (0.5)
14 * 00-03-14 fl removed most compatibility stuff (0.6)
15 * 00-05-10 fl towards third alpha (0.8.2)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000016 * 00-05-13 fl added experimental scanner stuff (0.8.3)
Fredrik Lundh436c3d52000-06-29 08:58:44 +000017 * 00-05-27 fl final bug hunt (0.8.4)
18 * 00-06-21 fl less bugs, more taste (0.8.5)
19 * 00-06-25 fl major changes to better deal with nested repeats (0.9)
20 * 00-06-28 fl fixed findall (0.9.1)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000021 * 00-06-29 fl fixed split, added more scanner features (0.9.2)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000022 * 00-06-30 fl tuning, fast search (0.9.3)
Fredrik Lundh0640e112000-06-30 13:55:15 +000023 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
Guido van Rossumb700df92000-03-31 14:59:30 +000024 *
25 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
26 *
Guido van Rossumb700df92000-03-31 14:59:30 +000027 * Portions of this engine have been developed in cooperation with
Fredrik Lundh22d25462000-07-01 17:50:59 +000028 * CNRI. Hewlett-Packard provided funding for 2.0 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000029 * other compatibility work.
30 */
31
32#ifndef SRE_RECURSIVE
33
Fredrik Lundh43b3b492000-06-30 10:41:31 +000034char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000035
36#include "Python.h"
37
38#include "sre.h"
39
Guido van Rossumb700df92000-03-31 14:59:30 +000040#if defined(HAVE_LIMITS_H)
41#include <limits.h>
42#else
43#define INT_MAX 2147483647
44#endif
45
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000046#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000047
Fredrik Lundh436c3d52000-06-29 08:58:44 +000048/* name of this module, minus the leading underscore */
49#define MODULE "sre"
50
Guido van Rossumb700df92000-03-31 14:59:30 +000051/* defining this one enables tracing */
52#undef DEBUG
53
Fredrik Lundh436c3d52000-06-29 08:58:44 +000054#if PY_VERSION_HEX >= 0x01060000
Fredrik Lundh22d25462000-07-01 17:50:59 +000055/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000056#define HAVE_UNICODE
57#endif
58
Fredrik Lundh29c08be2000-06-29 23:33:12 +000059/* optional features */
60#define USE_FAST_SEARCH
61
Fredrik Lundh80946112000-06-29 18:03:25 +000062#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000063#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
64/* fastest possible local call under MSVC */
65#define LOCAL(type) static __inline type __fastcall
66#else
Fredrik Lundh29c08be2000-06-29 23:33:12 +000067#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000068#endif
69
70/* error codes */
71#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
72#define SRE_ERROR_MEMORY -9 /* out of memory */
73
Fredrik Lundh436c3d52000-06-29 08:58:44 +000074#if defined(DEBUG)
Guido van Rossumb700df92000-03-31 14:59:30 +000075#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000076#else
77#define TRACE(v)
78#endif
79
Fredrik Lundh436c3d52000-06-29 08:58:44 +000080#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000081
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000082/* -------------------------------------------------------------------- */
83/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000084
Fredrik Lundh436c3d52000-06-29 08:58:44 +000085/* default character predicates (run sre_chars.py to regenerate tables) */
86
87#define SRE_DIGIT_MASK 1
88#define SRE_SPACE_MASK 2
89#define SRE_LINEBREAK_MASK 4
90#define SRE_ALNUM_MASK 8
91#define SRE_WORD_MASK 16
92
93static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
942, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
950, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
9625, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9724, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
980, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
100
Fredrik Lundhb389df32000-06-29 12:48:37 +0000101static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000010210, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
10327, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
10444, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
10561, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
106108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
107122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
108106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
109120, 121, 122, 123, 124, 125, 126, 127 };
110
Fredrik Lundhb389df32000-06-29 12:48:37 +0000111static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000112{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000113 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000114}
115
116#define SRE_IS_DIGIT(ch)\
117 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
118#define SRE_IS_SPACE(ch)\
119 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
120#define SRE_IS_LINEBREAK(ch)\
121 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
122#define SRE_IS_ALNUM(ch)\
123 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
124#define SRE_IS_WORD(ch)\
125 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000126
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000127/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000128
Fredrik Lundhb389df32000-06-29 12:48:37 +0000129static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000130{
131 return ((ch) < 256 ? tolower((ch)) : ch);
132}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000133#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
134#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
135#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
136#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
137#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
138
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000139/* unicode-specific character predicates */
140
141#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000142static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000143{
144 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
145}
Fredrik Lundh22d25462000-07-01 17:50:59 +0000146
147#if !defined(Py_UNICODE_ISALNUM)
148/* FIXME: workaround. should be fixed in unicodectype.c */
149#define Py_UNICODE_ISALNUM(ch)\
150 (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISUPPER(ch) ||\
151 Py_UNICODE_ISTITLE(ch) || Py_UNICODE_ISDIGIT(ch))
152#endif
153
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000154#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
155#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
156#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000157#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000158#define SRE_UNI_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
159#endif
160
Guido van Rossumb700df92000-03-31 14:59:30 +0000161LOCAL(int)
162sre_category(SRE_CODE category, unsigned int ch)
163{
164 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000165
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000166 case SRE_CATEGORY_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000167 return SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000168 case SRE_CATEGORY_NOT_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000169 return !SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000170 case SRE_CATEGORY_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000171 return SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000172 case SRE_CATEGORY_NOT_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000173 return !SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000174 case SRE_CATEGORY_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000175 return SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000176 case SRE_CATEGORY_NOT_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000177 return !SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000178 case SRE_CATEGORY_LINEBREAK:
179 return SRE_IS_LINEBREAK(ch);
180 case SRE_CATEGORY_NOT_LINEBREAK:
181 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000182
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000183 case SRE_CATEGORY_LOC_WORD:
184 return SRE_LOC_IS_WORD(ch);
185 case SRE_CATEGORY_LOC_NOT_WORD:
186 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000187
188#if defined(HAVE_UNICODE)
189 case SRE_CATEGORY_UNI_DIGIT:
190 return SRE_UNI_IS_DIGIT(ch);
191 case SRE_CATEGORY_UNI_NOT_DIGIT:
192 return !SRE_UNI_IS_DIGIT(ch);
193 case SRE_CATEGORY_UNI_SPACE:
194 return SRE_UNI_IS_SPACE(ch);
195 case SRE_CATEGORY_UNI_NOT_SPACE:
196 return !SRE_UNI_IS_SPACE(ch);
197 case SRE_CATEGORY_UNI_WORD:
198 return SRE_UNI_IS_WORD(ch);
199 case SRE_CATEGORY_UNI_NOT_WORD:
200 return !SRE_UNI_IS_WORD(ch);
201 case SRE_CATEGORY_UNI_LINEBREAK:
202 return SRE_UNI_IS_LINEBREAK(ch);
203 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
204 return !SRE_UNI_IS_LINEBREAK(ch);
205#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000206 }
207 return 0;
208}
209
210/* helpers */
211
212LOCAL(int)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000213stack_free(SRE_STATE* state)
Guido van Rossumb700df92000-03-31 14:59:30 +0000214{
215 if (state->stack) {
216 TRACE(("release stack\n"));
217 free(state->stack);
218 state->stack = NULL;
219 }
220 state->stacksize = 0;
221 return 0;
222}
223
224static int /* shouldn't be LOCAL */
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000225stack_extend(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000226{
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000227 SRE_STACK* stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000228 int stacksize;
229
230 /* grow the stack to a suitable size; we need at least lo entries,
231 at most hi entries. if for some reason hi is lower than lo, lo
232 wins */
233
234 stacksize = state->stacksize;
235
236 if (stacksize == 0) {
237 /* create new stack */
238 stacksize = 512;
239 if (stacksize < lo)
240 stacksize = lo;
241 else if (stacksize > hi)
242 stacksize = hi;
243 TRACE(("allocate stack %d\n", stacksize));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000244 stack = malloc(sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000245 } else {
246 /* grow the stack (typically by a factor of two) */
247 while (stacksize < lo)
248 stacksize = 2 * stacksize;
249 /* FIXME: <fl> could trim size if it's larger than lo, and
250 much larger than hi */
251 TRACE(("grow stack to %d\n", stacksize));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000252 stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000253 }
254
255 if (!stack) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000256 stack_free(state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000257 return SRE_ERROR_MEMORY;
258 }
259
260 state->stack = stack;
261 state->stacksize = stacksize;
262
263 return 0;
264}
265
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000266/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000267
268#define SRE_CHAR unsigned char
269#define SRE_AT sre_at
270#define SRE_MEMBER sre_member
271#define SRE_MATCH sre_match
272#define SRE_SEARCH sre_search
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000273
274#if defined(HAVE_UNICODE)
275
Guido van Rossumb700df92000-03-31 14:59:30 +0000276#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000277#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000278#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000279
Guido van Rossumb700df92000-03-31 14:59:30 +0000280#undef SRE_SEARCH
281#undef SRE_MATCH
282#undef SRE_MEMBER
283#undef SRE_AT
284#undef SRE_CHAR
285
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000286/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000287
288#define SRE_CHAR Py_UNICODE
289#define SRE_AT sre_uat
290#define SRE_MEMBER sre_umember
291#define SRE_MATCH sre_umatch
292#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000293#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000294
295#endif /* SRE_RECURSIVE */
296
297/* -------------------------------------------------------------------- */
298/* String matching engine */
299
300/* the following section is compiled twice, with different character
301 settings */
302
303LOCAL(int)
304SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
305{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000306 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000307
308 int this, that;
309
310 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000311
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000312 case SRE_AT_BEGINNING:
Guido van Rossum29530882000-04-10 17:06:55 +0000313 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000314
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000315 case SRE_AT_BEGINNING_LINE:
316 return ((void*) ptr == state->beginning ||
317 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000318
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000319 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000320 return (((void*) (ptr+1) == state->end &&
321 SRE_IS_LINEBREAK((int) ptr[0])) ||
322 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000323
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000324 case SRE_AT_END_LINE:
325 return ((void*) ptr == state->end ||
326 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000327
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000328 case SRE_AT_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000329 if (state->beginning == state->end)
330 return 0;
331 that = ((void*) ptr > state->beginning) ?
332 SRE_IS_WORD((int) ptr[-1]) : 0;
333 this = ((void*) ptr < state->end) ?
334 SRE_IS_WORD((int) ptr[0]) : 0;
335 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000336
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000337 case SRE_AT_NON_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000338 if (state->beginning == state->end)
339 return 0;
340 that = ((void*) ptr > state->beginning) ?
341 SRE_IS_WORD((int) ptr[-1]) : 0;
342 this = ((void*) ptr < state->end) ?
343 SRE_IS_WORD((int) ptr[0]) : 0;
344 return this == that;
345 }
346
347 return 0;
348}
349
350LOCAL(int)
Fredrik Lundh0640e112000-06-30 13:55:15 +0000351SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000352{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000353 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000354
355 int ok = 1;
356
357 for (;;) {
358 switch (*set++) {
359
360 case SRE_OP_NEGATE:
361 ok = !ok;
362 break;
363
364 case SRE_OP_FAILURE:
365 return !ok;
366
367 case SRE_OP_LITERAL:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000368 if (ch == set[0])
Guido van Rossumb700df92000-03-31 14:59:30 +0000369 return ok;
370 set++;
371 break;
372
373 case SRE_OP_RANGE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000374 if (set[0] <= ch && ch <= set[1])
Guido van Rossumb700df92000-03-31 14:59:30 +0000375 return ok;
376 set += 2;
377 break;
378
379 case SRE_OP_CATEGORY:
380 if (sre_category(set[0], (int) ch))
381 return ok;
382 set += 1;
383 break;
384
385 default:
Fredrik Lundh80946112000-06-29 18:03:25 +0000386 /* internal error -- there's not much we can do about it
387 here, so let's just pretend it didn't match... */
Guido van Rossumb700df92000-03-31 14:59:30 +0000388 return 0;
389 }
390 }
391}
392
393LOCAL(int)
394SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
395{
396 /* check if string matches the given pattern. returns -1 for
397 error, 0 for failure, and 1 for success */
398
399 SRE_CHAR* end = state->end;
400 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000401 int stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000402 int stackbase;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000403 int lastmark;
Guido van Rossumb700df92000-03-31 14:59:30 +0000404 int i, count;
405
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000406 /* FIXME: this is a hack! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000407 void* mark_copy[SRE_MARK_SIZE];
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000408 void* mark = NULL;
409
410 TRACE(("%8d: enter\n", PTR(ptr)));
411
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000412 if (pattern[0] == SRE_OP_INFO) {
413 /* optimization info block */
414 /* args: <1=skip> <2=flags> <3=min> ... */
415 if (pattern[3] && (end - ptr) < pattern[3]) {
416 TRACE(("reject (got %d chars, need %d)\n",
417 (end - ptr), pattern[3]));
418 return 0;
419 }
420 pattern += pattern[1] + 1;
421 }
422
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000423 stackbase = stack = state->stackbase;
424 lastmark = state->lastmark;
425
426 retry:
Guido van Rossumb700df92000-03-31 14:59:30 +0000427
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000428 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000429
430 switch (*pattern++) {
431
432 case SRE_OP_FAILURE:
433 /* immediate failure */
434 TRACE(("%8d: failure\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000435 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000436
437 case SRE_OP_SUCCESS:
438 /* end of pattern */
439 TRACE(("%8d: success\n", PTR(ptr)));
440 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000441 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000442
443 case SRE_OP_AT:
444 /* match at given position */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000445 /* args: <at> */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000446 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
Guido van Rossumb700df92000-03-31 14:59:30 +0000447 if (!SRE_AT(state, ptr, *pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000448 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000449 pattern++;
450 break;
451
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000452 case SRE_OP_CATEGORY:
453 /* match at given category */
454 /* args: <category> */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000455 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
456 *ptr, *pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000457 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
458 goto failure;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000459 TRACE(("%8d: category ok\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000460 pattern++;
461 ptr++;
462 break;
463
Guido van Rossumb700df92000-03-31 14:59:30 +0000464 case SRE_OP_LITERAL:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000465 /* match literal string */
Guido van Rossumb700df92000-03-31 14:59:30 +0000466 /* args: <code> */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000467 TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
468 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000469 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000470 pattern++;
471 ptr++;
472 break;
473
474 case SRE_OP_NOT_LITERAL:
475 /* match anything that is not literal character */
476 /* args: <code> */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000477 TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
478 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000479 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000480 pattern++;
481 ptr++;
482 break;
483
484 case SRE_OP_ANY:
485 /* match anything */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000486 TRACE(("%8d: anything\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000487 if (ptr >= end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000488 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000489 ptr++;
490 break;
491
492 case SRE_OP_IN:
493 /* match set member (or non_member) */
494 /* args: <skip> <set> */
495 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
496 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000497 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000498 pattern += pattern[0];
499 ptr++;
500 break;
501
502 case SRE_OP_GROUP:
503 /* match backreference */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000504 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000505 i = pattern[0];
506 {
Guido van Rossumb700df92000-03-31 14:59:30 +0000507 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
508 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
509 if (!p || !e || e < p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000510 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000511 while (p < e) {
512 if (ptr >= end || *ptr != *p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000513 goto failure;
514 p++; ptr++;
515 }
516 }
517 pattern++;
518 break;
519
520 case SRE_OP_GROUP_IGNORE:
521 /* match backreference */
522 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
523 i = pattern[0];
524 {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000525 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
526 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000527 if (!p || !e || e < p)
528 goto failure;
529 while (p < e) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000530 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000531 state->lower(*ptr) != state->lower(*p))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000532 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000533 p++; ptr++;
534 }
535 }
536 pattern++;
537 break;
538
539 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000540 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000541 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000542 state->lower(*ptr) != state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000543 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000544 pattern++;
545 ptr++;
546 break;
547
548 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000549 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000550 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000551 state->lower(*ptr) == state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000552 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000553 pattern++;
554 ptr++;
555 break;
556
557 case SRE_OP_IN_IGNORE:
558 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
559 if (ptr >= end
Fredrik Lundh0640e112000-06-30 13:55:15 +0000560 || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000561 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000562 pattern += pattern[0];
563 ptr++;
564 break;
565
566 case SRE_OP_MARK:
567 /* set mark */
568 /* args: <mark> */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000569 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
570 if (state->lastmark < pattern[0])
571 state->lastmark = pattern[0];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000572 if (!mark) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000573 mark = mark_copy;
574 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000575 }
576 state->mark[pattern[0]] = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000577 pattern++;
578 break;
579
580 case SRE_OP_JUMP:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000581 case SRE_OP_INFO:
Guido van Rossumb700df92000-03-31 14:59:30 +0000582 /* jump forward */
583 /* args: <skip> */
584 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
585 pattern += pattern[0];
586 break;
587
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000588 case SRE_OP_ASSERT:
589 /* assert subpattern */
Guido van Rossumb700df92000-03-31 14:59:30 +0000590 /* args: <skip> <pattern> */
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000591 TRACE(("%8d: assert subpattern\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000592 state->ptr = ptr;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000593 i = SRE_MATCH(state, pattern + 1);
594 if (i < 0)
595 return i;
596 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000597 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000598 pattern += pattern[0];
Guido van Rossumb700df92000-03-31 14:59:30 +0000599 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000600
601 case SRE_OP_ASSERT_NOT:
602 /* assert not subpattern */
603 /* args: <skip> <pattern> */
604 TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
605 state->ptr = ptr;
606 i = SRE_MATCH(state, pattern + 1);
607 if (i < 0)
608 return i;
609 if (i)
610 goto failure;
611 pattern += pattern[0];
612 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000613
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000614#if 0
Guido van Rossumb700df92000-03-31 14:59:30 +0000615 case SRE_OP_MAX_REPEAT_ONE:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000616 /* match repeated sequence (maximizing regexp) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000617
618 /* this operator only works if the repeated item is
619 exactly one character wide, and we're not already
620 collecting backtracking points. for other cases,
621 use the MAX_REPEAT operator instead */
622
Guido van Rossumb700df92000-03-31 14:59:30 +0000623 /* args: <skip> <min> <max> <step> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000624 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
625 pattern[1], pattern[2]));
626
627 count = 0;
628
629 if (pattern[3] == SRE_OP_ANY) {
630 /* repeated wildcard. skip to the end of the target
631 string, and backtrack from there */
632 /* FIXME: must look for line endings */
633 if (ptr + pattern[1] > end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000634 goto failure; /* cannot match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000635 count = pattern[2];
636 if (count > end - ptr)
637 count = end - ptr;
638 ptr += count;
639
640 } else if (pattern[3] == SRE_OP_LITERAL) {
641 /* repeated literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000642 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000643 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000644 if (ptr >= end || (SRE_CODE) ptr[0] != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000645 break;
646 ptr++;
647 count++;
648 }
649
650 } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
651 /* repeated literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000652 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000653 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000654 if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000655 break;
656 ptr++;
657 count++;
658 }
659
660 } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
661 /* repeated non-literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000662 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000663 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000664 if (ptr >= end || (SRE_CODE) ptr[0] == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000665 break;
666 ptr++;
667 count++;
668 }
669
670 } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
671 /* repeated non-literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000672 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000673 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000674 if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000675 break;
676 ptr++;
677 count++;
678 }
679
680 } else if (pattern[3] == SRE_OP_IN) {
681 /* repeated set */
682 while (count < (int) pattern[2]) {
683 if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
684 break;
685 ptr++;
686 count++;
687 }
688
689 } else {
690 /* repeated single character pattern */
691 state->ptr = ptr;
692 while (count < (int) pattern[2]) {
693 i = SRE_MATCH(state, pattern + 3);
694 if (i < 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000695 return i;
696 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000697 break;
698 count++;
699 }
700 state->ptr = ptr;
701 ptr += count;
702 }
703
704 /* when we arrive here, count contains the number of
705 matches, and ptr points to the tail of the target
706 string. check if the rest of the pattern matches, and
707 backtrack if not. */
708
Guido van Rossumb700df92000-03-31 14:59:30 +0000709 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
710
711 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000712 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000713
714 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
715 /* tail is empty. we're finished */
716 TRACE(("%8d: tail is empty\n", PTR(ptr)));
717 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000718 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000719
720 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721 /* tail starts with a literal. skip positions where
722 the rest of the pattern cannot possibly match */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000723 SRE_CODE chr = pattern[pattern[0]+1];
Guido van Rossumb700df92000-03-31 14:59:30 +0000724 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
725 for (;;) {
726 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
727 while (count >= (int) pattern[1] &&
728 (ptr >= end || *ptr != chr)) {
729 ptr--;
730 count--;
731 }
732 TRACE(("%8d: check tail\n", PTR(ptr)));
733 if (count < (int) pattern[1])
734 break;
735 state->ptr = ptr;
736 i = SRE_MATCH(state, pattern + pattern[0]);
737 if (i > 0) {
738 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000739 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000740 }
741 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
742 ptr--;
743 count--;
744 }
745
746 } else {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000747 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +0000748 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
749 while (count >= (int) pattern[1]) {
750 state->ptr = ptr;
751 i = SRE_MATCH(state, pattern + pattern[0]);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000752 if (i < 0)
753 return i;
754 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000755 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000756 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000757 }
758 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
759 ptr--;
760 count--;
761 }
762 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000763 goto failure;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000764#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000765
766 case SRE_OP_MAX_REPEAT:
767 /* match repeated sequence (maximizing regexp). repeated
768 group should end with a MAX_UNTIL code */
769
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000770 /* args: <skip> <min> <max> <item> */
771
772 TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
Guido van Rossumb700df92000-03-31 14:59:30 +0000773 pattern[1], pattern[2]));
774
775 count = 0;
776 state->ptr = ptr;
777
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000778 /* match minimum number of items */
779 while (count < (int) pattern[1]) {
780 i = SRE_MATCH(state, pattern + 3);
781 if (i < 0)
782 return i;
783 if (!i)
784 goto failure;
785 if (state->ptr == ptr) {
786 /* if the match was successful but empty, set the
787 count to max and terminate the scanning loop */
788 count = (int) pattern[2];
789 break;
790 }
791 count++;
792 ptr = state->ptr;
793 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000794
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000795 TRACE(("%8d: found %d leading items\n", PTR(ptr), count));
Guido van Rossumb700df92000-03-31 14:59:30 +0000796
797 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000798 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000799
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000800 /* match maximum number of items, pushing alternate end
801 points to the stack */
Guido van Rossumb700df92000-03-31 14:59:30 +0000802
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000803 while (pattern[2] == 32767 || count < (int) pattern[2]) {
804 state->stackbase = stack;
805 i = SRE_MATCH(state, pattern + 3);
806 state->stackbase = stackbase; /* rewind */
807 if (i < 0)
808 return i;
809 if (!i)
810 break;
811 if (state->ptr == ptr) {
812 count = (int) pattern[2];
813 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000814 }
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000815 /* this position was valid; add it to the retry
816 stack */
817 if (stack >= state->stacksize) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000818 i = stack_extend(state, stack + 1,
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000819 stackbase + pattern[2]);
820 if (i < 0)
821 return i; /* out of memory */
822 }
823 TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
824 state->stack[stack].ptr = ptr;
825 state->stack[stack].pattern = pattern + pattern[0];
826 stack++;
827 /* move forward */
828 ptr = state->ptr;
829 count++;
Guido van Rossumb700df92000-03-31 14:59:30 +0000830 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000831
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000832 /* when we get here, count is the number of successful
833 matches, and ptr points to the tail. */
Guido van Rossumb700df92000-03-31 14:59:30 +0000834
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000835 TRACE(("%8d: skip +%d\n", PTR(ptr), pattern[0]));
836
837 pattern += pattern[0];
838 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000839
840 case SRE_OP_MIN_REPEAT:
841 /* match repeated sequence (minimizing regexp) */
842 TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
843 pattern[1], pattern[2]));
844 count = 0;
845 state->ptr = ptr;
846 /* match minimum number of items */
847 while (count < (int) pattern[1]) {
848 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000849 if (i < 0)
850 return i;
851 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000852 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000853 count++;
854 }
855 /* move forward until the tail matches. */
856 while (count <= (int) pattern[2]) {
857 ptr = state->ptr;
858 i = SRE_MATCH(state, pattern + pattern[0]);
859 if (i > 0) {
860 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000861 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000862 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000863 state->ptr = ptr; /* backtrack */
864 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000865 if (i < 0)
866 return i;
867 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000868 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000869 count++;
870 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000871 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000872
Guido van Rossumb700df92000-03-31 14:59:30 +0000873 case SRE_OP_BRANCH:
874 /* match one of several subpatterns */
875 /* format: <branch> <size> <head> ... <null> <tail> */
876 TRACE(("%8d: branch\n", PTR(ptr)));
877 while (*pattern) {
878 if (pattern[1] != SRE_OP_LITERAL ||
Fredrik Lundh0640e112000-06-30 13:55:15 +0000879 (ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000880 TRACE(("%8d: branch check\n", PTR(ptr)));
881 state->ptr = ptr;
882 i = SRE_MATCH(state, pattern + 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000883 if (i < 0)
884 return i;
885 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000886 TRACE(("%8d: branch succeeded\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000887 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000888 }
889 }
890 pattern += *pattern;
891 }
892 TRACE(("%8d: branch failed\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000893 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000894
895 case SRE_OP_REPEAT:
896 /* TEMPLATE: match repeated sequence (no backtracking) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000897 /* args: <skip> <min> <max> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000898 TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
899 count = 0;
900 state->ptr = ptr;
901 while (count < (int) pattern[2]) {
902 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000903 if (i < 0)
904 return i;
905 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000906 break;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000907 if (state->ptr == ptr) {
908 count = (int) pattern[2];
909 break;
910 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000911 count++;
912 }
913 if (count <= (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000914 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000915 TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
916 pattern += pattern[0];
917 ptr = state->ptr;
918 break;
919
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000920 default:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000921 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000922 return SRE_ERROR_ILLEGAL;
923 }
924 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000925
926 failure:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000927 if (stack-- > stackbase) {
928 ptr = state->stack[stack].ptr;
929 pattern = state->stack[stack].pattern;
930 TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
931 goto retry;
932 }
933 TRACE(("%8d: leave (failure)\n", PTR(ptr)));
934 state->stackbase = stackbase;
935 state->lastmark = lastmark;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000936 if (mark)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000937 memcpy(state->mark, mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000938 return 0;
939
940 success:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000941 TRACE(("%8d: leave (success)\n", PTR(ptr)));
942 state->stackbase = stackbase;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000943 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000944}
945
946LOCAL(int)
947SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
948{
949 SRE_CHAR* ptr = state->start;
950 SRE_CHAR* end = state->end;
951 int status = 0;
Fredrik Lundh80946112000-06-29 18:03:25 +0000952 int prefix_len = 0;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000953 SRE_CODE* prefix;
954 SRE_CODE* overlap;
955 int literal = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000956
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000957 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000958 /* optimization info block */
959 /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
960
961 if (pattern[3] > 0) {
962 /* adjust end point (but make sure we leave at least one
963 character in there) */
964 end -= pattern[3]-1;
965 if (end <= ptr)
966 end = ptr+1;
967 }
968
969 literal = pattern[2];
970
971 prefix = pattern + 6;
972 prefix_len = pattern[5];
973
974 overlap = prefix + prefix_len - 1;
975
976 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000977 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000978
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000979#if defined(USE_FAST_SEARCH)
980 if (prefix_len > 1) {
981 /* pattern starts with a known prefix. use the overlap
982 table to skip forward as fast as we possibly can */
983 int i = 0;
984 end = state->end;
985 while (ptr < end) {
986 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000987 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000988 if (!i)
989 break;
990 else
991 i = overlap[i];
992 } else {
993 if (++i == prefix_len) {
994 /* found a potential match */
995 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
996 state->start = ptr - prefix_len + 1;
997 state->ptr = ptr + 1;
998 if (literal)
999 return 1; /* all of it */
1000 status = SRE_MATCH(state, pattern + 2*prefix_len);
1001 if (status != 0)
1002 return status;
1003 /* close but no cigar -- try again */
1004 i = overlap[i];
1005 }
1006 break;
1007 }
1008
1009 }
1010 ptr++;
1011 }
1012 return 0;
1013 }
1014#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001015
Guido van Rossumb700df92000-03-31 14:59:30 +00001016 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001017 /* pattern starts with a literal character. this is used for
1018 short prefixes, and if fast search is disabled*/
Fredrik Lundh0640e112000-06-30 13:55:15 +00001019 SRE_CODE chr = pattern[1];
Guido van Rossumb700df92000-03-31 14:59:30 +00001020 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001021 while (ptr < end && (SRE_CODE) ptr[0] != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +00001022 ptr++;
1023 if (ptr == end)
1024 return 0;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001025 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001026 state->start = ptr;
1027 state->ptr = ++ptr;
1028 status = SRE_MATCH(state, pattern + 2);
1029 if (status != 0)
1030 break;
1031 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001032 } else
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001033 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +00001034 while (ptr <= end) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001035 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001036 state->start = state->ptr = ptr++;
1037 status = SRE_MATCH(state, pattern);
1038 if (status != 0)
1039 break;
1040 }
1041
1042 return status;
1043}
1044
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001045#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001046
1047/* -------------------------------------------------------------------- */
1048/* factories and destructors */
1049
1050/* see sre.h for object declarations */
1051
1052staticforward PyTypeObject Pattern_Type;
1053staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001054staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001055
1056static PyObject *
1057_compile(PyObject* self_, PyObject* args)
1058{
1059 /* "compile" pattern descriptor to pattern object */
1060
1061 PatternObject* self;
1062
1063 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001064 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001065 PyObject* code;
1066 int groups = 0;
1067 PyObject* groupindex = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001068 if (!PyArg_ParseTuple(args, "OiO!|iO", &pattern, &flags,
1069 &PyString_Type, &code,
1070 &groups, &groupindex))
Guido van Rossumb700df92000-03-31 14:59:30 +00001071 return NULL;
1072
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001073 self = PyObject_NEW(PatternObject, &Pattern_Type);
Guido van Rossumb700df92000-03-31 14:59:30 +00001074 if (self == NULL)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001075
Guido van Rossumb700df92000-03-31 14:59:30 +00001076 return NULL;
1077
1078 Py_INCREF(pattern);
1079 self->pattern = pattern;
1080
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001081 self->flags = flags;
1082
Guido van Rossumb700df92000-03-31 14:59:30 +00001083 Py_INCREF(code);
1084 self->code = code;
1085
1086 self->groups = groups;
1087
1088 Py_XINCREF(groupindex);
1089 self->groupindex = groupindex;
1090
1091 return (PyObject*) self;
1092}
1093
1094static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001095sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001096{
1097 return Py_BuildValue("i", sizeof(SRE_CODE));
1098}
1099
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001100static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001101sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001102{
1103 int character, flags;
1104 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1105 return NULL;
1106 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001107 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001108#if defined(HAVE_UNICODE)
1109 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001110 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001111#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001112 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001113}
1114
Guido van Rossumb700df92000-03-31 14:59:30 +00001115LOCAL(PyObject*)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001116state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001117{
1118 /* prepare state object */
1119
1120 PyBufferProcs *buffer;
1121 int i, count;
1122 void* ptr;
1123
1124 PyObject* string;
1125 int start = 0;
1126 int end = INT_MAX;
1127 if (!PyArg_ParseTuple(args, "O|ii", &string, &start, &end))
1128 return NULL;
1129
1130 /* get pointer to string buffer */
1131 buffer = string->ob_type->tp_as_buffer;
1132 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1133 buffer->bf_getsegcount(string, NULL) != 1) {
1134 PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
1135 return NULL;
1136 }
1137
1138 /* determine buffer size */
1139 count = buffer->bf_getreadbuffer(string, 0, &ptr);
1140 if (count < 0) {
1141 /* sanity check */
1142 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1143 return NULL;
1144 }
1145
1146 /* determine character size */
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001147#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001148 state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001149#else
1150 state->charsize = 1;
1151#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001152
1153 count /= state->charsize;
1154
1155 /* adjust boundaries */
1156 if (start < 0)
1157 start = 0;
1158 else if (start > count)
1159 start = count;
1160
1161 if (end < 0)
1162 end = 0;
1163 else if (end > count)
1164 end = count;
1165
1166 state->beginning = ptr;
1167
1168 state->start = (void*) ((char*) ptr + start * state->charsize);
1169 state->end = (void*) ((char*) ptr + end * state->charsize);
1170
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001171 state->lastmark = 0;
1172
Guido van Rossumb700df92000-03-31 14:59:30 +00001173 /* FIXME: dynamic! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001174 for (i = 0; i < SRE_MARK_SIZE; i++)
Guido van Rossumb700df92000-03-31 14:59:30 +00001175 state->mark[i] = NULL;
1176
1177 state->stack = NULL;
1178 state->stackbase = 0;
1179 state->stacksize = 0;
1180
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001181 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001182 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001183#if defined(HAVE_UNICODE)
1184 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001185 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001186#endif
1187 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001188 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001189
Guido van Rossumb700df92000-03-31 14:59:30 +00001190 return string;
1191}
1192
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001193LOCAL(void)
1194state_fini(SRE_STATE* state)
1195{
1196 stack_free(state);
1197}
1198
1199LOCAL(PyObject*)
1200state_getslice(SRE_STATE* state, int index, PyObject* string)
1201{
1202 index = (index - 1) * 2;
1203
1204 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1205 Py_INCREF(Py_None);
1206 return Py_None;
1207 }
1208
1209 return PySequence_GetSlice(
1210 string,
1211 ((char*)state->mark[index] - (char*)state->beginning) /
1212 state->charsize,
1213 ((char*)state->mark[index+1] - (char*)state->beginning) /
1214 state->charsize
1215 );
1216}
1217
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001218static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001219pattern_new_match(PatternObject* pattern, SRE_STATE* state,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001220 PyObject* string, int status)
1221{
1222 /* create match object (from state object) */
1223
1224 MatchObject* match;
1225 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001226 char* base;
1227 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001228
1229 if (status > 0) {
1230
1231 /* create match object (with room for extra group marks) */
1232 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2*pattern->groups);
1233 if (match == NULL)
1234 return NULL;
1235
1236 Py_INCREF(pattern);
1237 match->pattern = pattern;
1238
1239 Py_INCREF(string);
1240 match->string = string;
1241
1242 match->groups = pattern->groups+1;
1243
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001244 base = (char*) state->beginning;
1245 n = state->charsize;
1246
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001247 /* group zero */
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001248 match->mark[0] = ((char*) state->start - base) / n;
1249 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001250
1251 /* fill in the rest of the groups */
1252 for (i = j = 0; i < pattern->groups; i++, j+=2)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001253 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1254 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1255 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001256 } else
1257 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1258
1259 return (PyObject*) match;
1260
1261 } else if (status < 0) {
1262
1263 /* internal error */
1264 PyErr_SetString(
1265 PyExc_RuntimeError, "internal error in regular expression engine"
1266 );
1267 return NULL;
1268
1269 }
1270
1271 Py_INCREF(Py_None);
1272 return Py_None;
1273}
1274
1275static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001276pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001277{
1278 /* create search state object */
1279
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001280 ScannerObject* self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001281 PyObject* string;
1282
1283 /* create match object (with room for extra group marks) */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001284 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001285 if (self == NULL)
1286 return NULL;
1287
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001288 string = state_init(&self->state, pattern, args);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001289 if (!string) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001290 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001291 return NULL;
1292 }
1293
1294 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001295 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001296
1297 Py_INCREF(string);
1298 self->string = string;
1299
1300 return (PyObject*) self;
1301}
1302
Guido van Rossumb700df92000-03-31 14:59:30 +00001303static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001304pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001305{
1306 Py_XDECREF(self->code);
1307 Py_XDECREF(self->pattern);
1308 Py_XDECREF(self->groupindex);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001309 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001310}
1311
1312static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001313pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001314{
1315 SRE_STATE state;
1316 PyObject* string;
1317 int status;
1318
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001319 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001320 if (!string)
1321 return NULL;
1322
1323 state.ptr = state.start;
1324
1325 if (state.charsize == 1) {
1326 status = sre_match(&state, PatternObject_GetCode(self));
1327 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001328#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001329 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001330#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001331 }
1332
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001333 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001334
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001335 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001336}
1337
1338static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001339pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001340{
1341 SRE_STATE state;
1342 PyObject* string;
1343 int status;
1344
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001345 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001346 if (!string)
1347 return NULL;
1348
1349 if (state.charsize == 1) {
1350 status = sre_search(&state, PatternObject_GetCode(self));
1351 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001352#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001353 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001354#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001355 }
1356
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001357 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001358
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001359 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001360}
1361
1362static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001363call(char* function, PyObject* args)
1364{
1365 PyObject* name;
1366 PyObject* module;
1367 PyObject* func;
1368 PyObject* result;
1369
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001370 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001371 if (!name)
1372 return NULL;
1373 module = PyImport_Import(name);
1374 Py_DECREF(name);
1375 if (!module)
1376 return NULL;
1377 func = PyObject_GetAttrString(module, function);
1378 Py_DECREF(module);
1379 if (!func)
1380 return NULL;
1381 result = PyObject_CallObject(func, args);
1382 Py_DECREF(func);
1383 Py_DECREF(args);
1384 return result;
1385}
1386
1387static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001388pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001389{
1390 PyObject* template;
1391 PyObject* string;
1392 PyObject* count;
1393 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1394 return NULL;
1395
1396 /* delegate to Python code */
1397 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1398}
1399
1400static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001401pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001402{
1403 PyObject* template;
1404 PyObject* string;
1405 PyObject* count;
1406 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1407 return NULL;
1408
1409 /* delegate to Python code */
1410 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1411}
1412
1413static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001414pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001415{
1416 PyObject* string;
1417 PyObject* maxsplit;
1418 if (!PyArg_ParseTuple(args, "OO", &string, &maxsplit))
1419 return NULL;
1420
1421 /* delegate to Python code */
1422 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1423}
1424
1425static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001426pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001427{
Guido van Rossumb700df92000-03-31 14:59:30 +00001428 SRE_STATE state;
1429 PyObject* string;
1430 PyObject* list;
1431 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001432 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001433
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001434 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001435 if (!string)
1436 return NULL;
1437
1438 list = PyList_New(0);
1439
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001440 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001441
1442 PyObject* item;
1443
1444 state.ptr = state.start;
1445
1446 if (state.charsize == 1) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001447 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +00001448 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001449#if defined(HAVE_UNICODE)
1450 status = sre_usearch(&state, PatternObject_GetCode(self));
1451#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001452 }
1453
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001454 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001455
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001456 /* don't bother to build a match object */
1457 switch (self->groups) {
1458 case 0:
1459 item = PySequence_GetSlice(
1460 string,
1461 ((char*) state.start - (char*) state.beginning) /
1462 state.charsize,
1463 ((char*) state.ptr - (char*) state.beginning) /
1464 state.charsize);
1465 if (!item)
1466 goto error;
1467 break;
1468 case 1:
1469 item = state_getslice(&state, 1, string);
1470 if (!item)
1471 goto error;
1472 break;
1473 default:
1474 item = PyTuple_New(self->groups);
1475 if (!item)
1476 goto error;
1477 for (i = 0; i < self->groups; i++) {
1478 PyObject* o = state_getslice(&state, i+1, string);
1479 if (!o) {
1480 Py_DECREF(item);
1481 goto error;
1482 }
1483 PyTuple_SET_ITEM(item, i, o);
1484 }
1485 break;
1486 }
1487
1488 if (PyList_Append(list, item) < 0) {
1489 Py_DECREF(item);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001490 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001491 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001492
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001493 if (state.ptr == state.start)
1494 state.start = (void*) ((char*) state.ptr + state.charsize);
1495 else
1496 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001497
1498 } else {
1499
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001500 if (status == 0)
1501 break;
1502
Guido van Rossumb700df92000-03-31 14:59:30 +00001503 /* internal error */
1504 PyErr_SetString(
1505 PyExc_RuntimeError,
1506 "internal error in regular expression engine"
1507 );
1508 goto error;
1509
1510 }
1511 }
1512
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001513 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001514 return list;
1515
1516error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001517 Py_DECREF(list);
1518 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001519 return NULL;
1520
1521}
1522
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001523static PyMethodDef pattern_methods[] = {
1524 {"match", (PyCFunction) pattern_match, 1},
1525 {"search", (PyCFunction) pattern_search, 1},
1526 {"sub", (PyCFunction) pattern_sub, 1},
1527 {"subn", (PyCFunction) pattern_subn, 1},
1528 {"split", (PyCFunction) pattern_split, 1},
1529 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001530 /* experimental */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001531 {"scanner", (PyCFunction) pattern_scanner, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001532 {NULL, NULL}
1533};
1534
1535static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001536pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001537{
1538 PyObject* res;
1539
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001540 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001541
1542 if (res)
1543 return res;
1544
1545 PyErr_Clear();
1546
1547 /* attributes */
1548 if (!strcmp(name, "pattern")) {
1549 Py_INCREF(self->pattern);
1550 return self->pattern;
1551 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001552
1553 if (!strcmp(name, "flags"))
1554 return Py_BuildValue("i", self->flags);
1555
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001556 if (!strcmp(name, "groups"))
1557 return Py_BuildValue("i", self->groups);
1558
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001559 if (!strcmp(name, "groupindex") && self->groupindex) {
1560 Py_INCREF(self->groupindex);
1561 return self->groupindex;
1562 }
1563
Guido van Rossumb700df92000-03-31 14:59:30 +00001564 PyErr_SetString(PyExc_AttributeError, name);
1565 return NULL;
1566}
1567
1568statichere PyTypeObject Pattern_Type = {
1569 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001570 0, "SRE_Pattern", sizeof(PatternObject), 0,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001571 (destructor)pattern_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001572 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001573 (getattrfunc)pattern_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001574};
1575
1576/* -------------------------------------------------------------------- */
1577/* match methods */
1578
1579static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001580match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001581{
1582 Py_XDECREF(self->string);
1583 Py_DECREF(self->pattern);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001584 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001585}
1586
1587static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001588match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001589{
1590 if (index < 0 || index >= self->groups) {
1591 /* raise IndexError if we were given a bad group number */
1592 PyErr_SetString(
1593 PyExc_IndexError,
1594 "no such group"
1595 );
1596 return NULL;
1597 }
1598
1599 if (self->string == Py_None || self->mark[index+index] < 0) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001600 /* return default value if the string or group is undefined */
1601 Py_INCREF(def);
1602 return def;
Guido van Rossumb700df92000-03-31 14:59:30 +00001603 }
1604
1605 return PySequence_GetSlice(
1606 self->string, self->mark[index+index], self->mark[index+index+1]
1607 );
1608}
1609
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001610static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001611match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001612{
1613 if (!PyInt_Check(index) && self->pattern->groupindex != NULL) {
1614 /* FIXME: resource leak? */
1615 index = PyObject_GetItem(self->pattern->groupindex, index);
1616 if (!index)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001617 return -1;
Guido van Rossumb700df92000-03-31 14:59:30 +00001618 }
1619
1620 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001621 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001622
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001623 return -1;
1624}
1625
1626static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001627match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001628{
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001629 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001630}
1631
1632static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001633match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001634{
1635 PyObject* result;
1636 int i, size;
1637
1638 size = PyTuple_GET_SIZE(args);
1639
1640 switch (size) {
1641 case 0:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001642 result = match_getslice(self, Py_False, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001643 break;
1644 case 1:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001645 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001646 break;
1647 default:
1648 /* fetch multiple items */
1649 result = PyTuple_New(size);
1650 if (!result)
1651 return NULL;
1652 for (i = 0; i < size; i++) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001653 PyObject* item = match_getslice(
1654 self, PyTuple_GET_ITEM(args, i), Py_None
1655 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001656 if (!item) {
1657 Py_DECREF(result);
1658 return NULL;
1659 }
1660 PyTuple_SET_ITEM(result, i, item);
1661 }
1662 break;
1663 }
1664 return result;
1665}
1666
1667static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001668match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001669{
1670 PyObject* result;
1671 int index;
1672
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001673 PyObject* def = Py_None;
1674 if (!PyArg_ParseTuple(args, "|O", &def))
1675 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001676
Guido van Rossumb700df92000-03-31 14:59:30 +00001677 result = PyTuple_New(self->groups-1);
1678 if (!result)
1679 return NULL;
1680
1681 for (index = 1; index < self->groups; index++) {
1682 PyObject* item;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001683 item = match_getslice_by_index(self, index, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001684 if (!item) {
1685 Py_DECREF(result);
1686 return NULL;
1687 }
1688 PyTuple_SET_ITEM(result, index-1, item);
1689 }
1690
1691 return result;
1692}
1693
1694static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001695match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001696{
1697 PyObject* result;
1698 PyObject* keys;
1699 int index;
1700
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001701 PyObject* def = Py_None;
1702 if (!PyArg_ParseTuple(args, "|O", &def))
1703 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001704
Guido van Rossumb700df92000-03-31 14:59:30 +00001705 result = PyDict_New();
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001706 if (!result || !self->pattern->groupindex)
Guido van Rossumb700df92000-03-31 14:59:30 +00001707 return result;
1708
1709 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001710 if (!keys) {
1711 Py_DECREF(result);
Guido van Rossumb700df92000-03-31 14:59:30 +00001712 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001713 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001714
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001715 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001716 PyObject* key;
1717 PyObject* item;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001718 key = PyList_GET_ITEM(keys, index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001719 if (!key) {
1720 Py_DECREF(keys);
1721 Py_DECREF(result);
1722 return NULL;
1723 }
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001724 item = match_getslice(self, key, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001725 if (!item) {
1726 Py_DECREF(key);
1727 Py_DECREF(keys);
1728 Py_DECREF(result);
1729 return NULL;
1730 }
1731 /* FIXME: <fl> this can fail, right? */
1732 PyDict_SetItem(result, key, item);
1733 }
1734
1735 Py_DECREF(keys);
1736
1737 return result;
1738}
1739
1740static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001741match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001742{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001743 int index;
1744
1745 PyObject* index_ = Py_False;
1746 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001747 return NULL;
1748
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001749 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001750
Guido van Rossumb700df92000-03-31 14:59:30 +00001751 if (index < 0 || index >= self->groups) {
1752 PyErr_SetString(
1753 PyExc_IndexError,
1754 "no such group"
1755 );
1756 return NULL;
1757 }
1758
1759 if (self->mark[index*2] < 0) {
1760 Py_INCREF(Py_None);
1761 return Py_None;
1762 }
1763
1764 return Py_BuildValue("i", self->mark[index*2]);
1765}
1766
1767static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001768match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001769{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001770 int index;
1771
1772 PyObject* index_ = Py_False;
1773 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001774 return NULL;
1775
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001776 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001777
Guido van Rossumb700df92000-03-31 14:59:30 +00001778 if (index < 0 || index >= self->groups) {
1779 PyErr_SetString(
1780 PyExc_IndexError,
1781 "no such group"
1782 );
1783 return NULL;
1784 }
1785
1786 if (self->mark[index*2] < 0) {
1787 Py_INCREF(Py_None);
1788 return Py_None;
1789 }
1790
1791 return Py_BuildValue("i", self->mark[index*2+1]);
1792}
1793
1794static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001795match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001796{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001797 int index;
1798
1799 PyObject* index_ = Py_False;
1800 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001801 return NULL;
1802
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001803 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001804
Guido van Rossumb700df92000-03-31 14:59:30 +00001805 if (index < 0 || index >= self->groups) {
1806 PyErr_SetString(
1807 PyExc_IndexError,
1808 "no such group"
1809 );
1810 return NULL;
1811 }
1812
1813 if (self->mark[index*2] < 0) {
1814 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001815 Py_INCREF(Py_None);
1816 return Py_BuildValue("OO", Py_None, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001817 }
1818
1819 return Py_BuildValue("ii", self->mark[index*2], self->mark[index*2+1]);
1820}
1821
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001822static PyMethodDef match_methods[] = {
1823 {"group", (PyCFunction) match_group, 1},
1824 {"start", (PyCFunction) match_start, 1},
1825 {"end", (PyCFunction) match_end, 1},
1826 {"span", (PyCFunction) match_span, 1},
1827 {"groups", (PyCFunction) match_groups, 1},
1828 {"groupdict", (PyCFunction) match_groupdict, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001829 {NULL, NULL}
1830};
1831
1832static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001833match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001834{
1835 PyObject* res;
1836
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001837 res = Py_FindMethod(match_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001838 if (res)
1839 return res;
1840
1841 PyErr_Clear();
1842
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001843 /* attributes */
Guido van Rossumb700df92000-03-31 14:59:30 +00001844 if (!strcmp(name, "string")) {
1845 Py_INCREF(self->string);
1846 return self->string;
1847 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001848
Guido van Rossumb700df92000-03-31 14:59:30 +00001849 if (!strcmp(name, "re")) {
1850 Py_INCREF(self->pattern);
1851 return (PyObject*) self->pattern;
1852 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001853
Guido van Rossumb700df92000-03-31 14:59:30 +00001854 if (!strcmp(name, "pos"))
1855 return Py_BuildValue("i", 0); /* FIXME */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001856
Guido van Rossumb700df92000-03-31 14:59:30 +00001857 if (!strcmp(name, "endpos"))
1858 return Py_BuildValue("i", 0); /* FIXME */
1859
1860 PyErr_SetString(PyExc_AttributeError, name);
1861 return NULL;
1862}
1863
1864/* FIXME: implement setattr("string", None) as a special case (to
1865 detach the associated string, if any */
1866
1867statichere PyTypeObject Match_Type = {
1868 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001869 0, "SRE_Match",
Guido van Rossumb700df92000-03-31 14:59:30 +00001870 sizeof(MatchObject), /* size of basic object */
1871 sizeof(int), /* space for group item */
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001872 (destructor)match_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001873 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001874 (getattrfunc)match_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001875};
1876
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001877/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001878/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001879
1880static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001881scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001882{
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001883 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001884 Py_DECREF(self->string);
1885 Py_DECREF(self->pattern);
1886 PyMem_DEL(self);
1887}
1888
1889static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001890scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001891{
1892 SRE_STATE* state = &self->state;
1893 PyObject* match;
1894 int status;
1895
1896 state->ptr = state->start;
1897
1898 if (state->charsize == 1) {
1899 status = sre_match(state, PatternObject_GetCode(self->pattern));
1900 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001901#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001902 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001903#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001904 }
1905
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001906 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001907 state, self->string, status);
1908
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001909 if (status == 0 || state->ptr == state->start)
1910 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001911 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001912 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001913
1914 return match;
1915}
1916
1917
1918static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001919scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001920{
1921 SRE_STATE* state = &self->state;
1922 PyObject* match;
1923 int status;
1924
1925 state->ptr = state->start;
1926
1927 if (state->charsize == 1) {
1928 status = sre_search(state, PatternObject_GetCode(self->pattern));
1929 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001930#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001931 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001932#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001933 }
1934
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001935 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001936 state, self->string, status);
1937
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001938 if (status == 0 || state->ptr == state->start)
1939 state->start = (void*) ((char*) state->ptr + state->charsize);
1940 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001941 state->start = state->ptr;
1942
1943 return match;
1944}
1945
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001946static PyMethodDef scanner_methods[] = {
1947 {"match", (PyCFunction) scanner_match, 0},
1948 {"search", (PyCFunction) scanner_search, 0},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001949 {NULL, NULL}
1950};
1951
1952static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001953scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001954{
1955 PyObject* res;
1956
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001957 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001958 if (res)
1959 return res;
1960
1961 PyErr_Clear();
1962
1963 /* attributes */
1964 if (!strcmp(name, "pattern")) {
1965 Py_INCREF(self->pattern);
1966 return self->pattern;
1967 }
1968
1969 PyErr_SetString(PyExc_AttributeError, name);
1970 return NULL;
1971}
1972
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001973statichere PyTypeObject Scanner_Type = {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001974 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001975 0, "SRE_Scanner",
1976 sizeof(ScannerObject), /* size of basic object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001977 0,
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001978 (destructor)scanner_dealloc, /*tp_dealloc*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001979 0, /*tp_print*/
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001980 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001981};
1982
Guido van Rossumb700df92000-03-31 14:59:30 +00001983static PyMethodDef _functions[] = {
1984 {"compile", _compile, 1},
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001985 {"getcodesize", sre_codesize, 1},
Fredrik Lundhb389df32000-06-29 12:48:37 +00001986 {"getlower", sre_getlower, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001987 {NULL, NULL}
1988};
1989
1990void
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001991#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00001992__declspec(dllexport)
1993#endif
1994init_sre()
1995{
1996 /* Patch object types */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001997 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001998 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001999
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002000 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00002001}
2002
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002003#endif /* !defined(SRE_RECURSIVE) */