blob: 206e8d0fe28f4ea2e2df9fc0eaf9b57df0a323f7 [file] [log] [blame]
Guido van Rossumb700df92000-03-31 14:59:30 +00001/* -*- Mode: C; tab-width: 4 -*-
2 *
3 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00004 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00005 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00006 *
7 * partial history:
Fredrik Lundh436c3d582000-06-29 08:58:44 +00008 * 99-10-24 fl created (based on existing template matcher code)
Guido van Rossumb700df92000-03-31 14:59:30 +00009 * 99-11-13 fl added categories, branching, and more (0.2)
10 * 99-11-16 fl some tweaks to compile on non-Windows platforms
11 * 99-12-18 fl non-literals, generic maximizing repeat (0.3)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012 * 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
13 * 00-03-06 fl first alpha, sort of (0.5)
14 * 00-03-14 fl removed most compatibility stuff (0.6)
15 * 00-05-10 fl towards third alpha (0.8.2)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000016 * 00-05-13 fl added experimental scanner stuff (0.8.3)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000017 * 00-05-27 fl final bug hunt (0.8.4)
18 * 00-06-21 fl less bugs, more taste (0.8.5)
19 * 00-06-25 fl major changes to better deal with nested repeats (0.9)
20 * 00-06-28 fl fixed findall (0.9.1)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000021 * 00-06-29 fl fixed split, added more scanner features (0.9.2)
Guido van Rossumb700df92000-03-31 14:59:30 +000022 *
23 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
24 *
Guido van Rossumb700df92000-03-31 14:59:30 +000025 * Portions of this engine have been developed in cooperation with
26 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
27 * other compatibility work.
28 */
29
30#ifndef SRE_RECURSIVE
31
Fredrik Lundh436c3d582000-06-29 08:58:44 +000032static char
Fredrik Lundh80946112000-06-29 18:03:25 +000033copyright[] = " SRE 0.9.2 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000034
35#include "Python.h"
36
37#include "sre.h"
38
Guido van Rossumb700df92000-03-31 14:59:30 +000039#if defined(HAVE_LIMITS_H)
40#include <limits.h>
41#else
42#define INT_MAX 2147483647
43#endif
44
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000045#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000046
Fredrik Lundh436c3d582000-06-29 08:58:44 +000047/* name of this module, minus the leading underscore */
48#define MODULE "sre"
49
Guido van Rossumb700df92000-03-31 14:59:30 +000050/* defining this one enables tracing */
51#undef DEBUG
52
Fredrik Lundh436c3d582000-06-29 08:58:44 +000053#if PY_VERSION_HEX >= 0x01060000
54/* defining this enables unicode support (default under 1.6) */
55#define HAVE_UNICODE
56#endif
57
Fredrik Lundh80946112000-06-29 18:03:25 +000058#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000059#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
60/* fastest possible local call under MSVC */
61#define LOCAL(type) static __inline type __fastcall
62#else
63#define LOCAL(type) static type
64#endif
65
66/* error codes */
67#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
68#define SRE_ERROR_MEMORY -9 /* out of memory */
69
Fredrik Lundh436c3d582000-06-29 08:58:44 +000070#if defined(DEBUG)
Guido van Rossumb700df92000-03-31 14:59:30 +000071#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000072#else
73#define TRACE(v)
74#endif
75
Fredrik Lundh436c3d582000-06-29 08:58:44 +000076#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000077
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000078/* -------------------------------------------------------------------- */
79/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000080
Fredrik Lundh436c3d582000-06-29 08:58:44 +000081/* default character predicates (run sre_chars.py to regenerate tables) */
82
83#define SRE_DIGIT_MASK 1
84#define SRE_SPACE_MASK 2
85#define SRE_LINEBREAK_MASK 4
86#define SRE_ALNUM_MASK 8
87#define SRE_WORD_MASK 16
88
89static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
902, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
910, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
9225, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
940, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
96
Fredrik Lundhb389df32000-06-29 12:48:37 +000097static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +00009810, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
9927, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
10044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
10161, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
102108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
103122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
104106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
105120, 121, 122, 123, 124, 125, 126, 127 };
106
Fredrik Lundhb389df32000-06-29 12:48:37 +0000107static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000108{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000109 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000110}
111
112#define SRE_IS_DIGIT(ch)\
113 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
114#define SRE_IS_SPACE(ch)\
115 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
116#define SRE_IS_LINEBREAK(ch)\
117 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
118#define SRE_IS_ALNUM(ch)\
119 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
120#define SRE_IS_WORD(ch)\
121 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000122
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000123/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000124
Fredrik Lundhb389df32000-06-29 12:48:37 +0000125static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000126{
127 return ((ch) < 256 ? tolower((ch)) : ch);
128}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000129#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
130#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
131#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
132#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
133#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
134
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000135/* unicode-specific character predicates */
136
137#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000138static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000139{
140 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
141}
142#define SRE_UNI_TO_LOWER(ch) Py_UNICODE_TOLOWER((Py_UNICODE)(ch))
143#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
144#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
145#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
146#define SRE_UNI_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
147#define SRE_UNI_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
148#endif
149
Guido van Rossumb700df92000-03-31 14:59:30 +0000150LOCAL(int)
151sre_category(SRE_CODE category, unsigned int ch)
152{
153 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000154
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000155 case SRE_CATEGORY_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000156 return SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000157 case SRE_CATEGORY_NOT_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000158 return !SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000159 case SRE_CATEGORY_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000160 return SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000161 case SRE_CATEGORY_NOT_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000162 return !SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163 case SRE_CATEGORY_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000164 return SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165 case SRE_CATEGORY_NOT_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000166 return !SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000167 case SRE_CATEGORY_LINEBREAK:
168 return SRE_IS_LINEBREAK(ch);
169 case SRE_CATEGORY_NOT_LINEBREAK:
170 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000171
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000172 case SRE_CATEGORY_LOC_WORD:
173 return SRE_LOC_IS_WORD(ch);
174 case SRE_CATEGORY_LOC_NOT_WORD:
175 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176
177#if defined(HAVE_UNICODE)
178 case SRE_CATEGORY_UNI_DIGIT:
179 return SRE_UNI_IS_DIGIT(ch);
180 case SRE_CATEGORY_UNI_NOT_DIGIT:
181 return !SRE_UNI_IS_DIGIT(ch);
182 case SRE_CATEGORY_UNI_SPACE:
183 return SRE_UNI_IS_SPACE(ch);
184 case SRE_CATEGORY_UNI_NOT_SPACE:
185 return !SRE_UNI_IS_SPACE(ch);
186 case SRE_CATEGORY_UNI_WORD:
187 return SRE_UNI_IS_WORD(ch);
188 case SRE_CATEGORY_UNI_NOT_WORD:
189 return !SRE_UNI_IS_WORD(ch);
190 case SRE_CATEGORY_UNI_LINEBREAK:
191 return SRE_UNI_IS_LINEBREAK(ch);
192 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
193 return !SRE_UNI_IS_LINEBREAK(ch);
194#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000195 }
196 return 0;
197}
198
199/* helpers */
200
201LOCAL(int)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000202stack_free(SRE_STATE* state)
Guido van Rossumb700df92000-03-31 14:59:30 +0000203{
204 if (state->stack) {
205 TRACE(("release stack\n"));
206 free(state->stack);
207 state->stack = NULL;
208 }
209 state->stacksize = 0;
210 return 0;
211}
212
213static int /* shouldn't be LOCAL */
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000214stack_extend(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000215{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000216 SRE_STACK* stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000217 int stacksize;
218
219 /* grow the stack to a suitable size; we need at least lo entries,
220 at most hi entries. if for some reason hi is lower than lo, lo
221 wins */
222
223 stacksize = state->stacksize;
224
225 if (stacksize == 0) {
226 /* create new stack */
227 stacksize = 512;
228 if (stacksize < lo)
229 stacksize = lo;
230 else if (stacksize > hi)
231 stacksize = hi;
232 TRACE(("allocate stack %d\n", stacksize));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000233 stack = malloc(sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000234 } else {
235 /* grow the stack (typically by a factor of two) */
236 while (stacksize < lo)
237 stacksize = 2 * stacksize;
238 /* FIXME: <fl> could trim size if it's larger than lo, and
239 much larger than hi */
240 TRACE(("grow stack to %d\n", stacksize));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000241 stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000242 }
243
244 if (!stack) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000245 stack_free(state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000246 return SRE_ERROR_MEMORY;
247 }
248
249 state->stack = stack;
250 state->stacksize = stacksize;
251
252 return 0;
253}
254
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000255/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000256
257#define SRE_CHAR unsigned char
258#define SRE_AT sre_at
259#define SRE_MEMBER sre_member
260#define SRE_MATCH sre_match
261#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000262
263#if defined(HAVE_UNICODE)
264
Guido van Rossumb700df92000-03-31 14:59:30 +0000265#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000266#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000267#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000268
Guido van Rossumb700df92000-03-31 14:59:30 +0000269#undef SRE_SEARCH
270#undef SRE_MATCH
271#undef SRE_MEMBER
272#undef SRE_AT
273#undef SRE_CHAR
274
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000275/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000276
277#define SRE_CHAR Py_UNICODE
278#define SRE_AT sre_uat
279#define SRE_MEMBER sre_umember
280#define SRE_MATCH sre_umatch
281#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000282#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000283
284#endif /* SRE_RECURSIVE */
285
286/* -------------------------------------------------------------------- */
287/* String matching engine */
288
289/* the following section is compiled twice, with different character
290 settings */
291
292LOCAL(int)
293SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
294{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000295 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000296
297 int this, that;
298
299 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000300
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000301 case SRE_AT_BEGINNING:
Guido van Rossum29530882000-04-10 17:06:55 +0000302 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000303
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000304 case SRE_AT_BEGINNING_LINE:
305 return ((void*) ptr == state->beginning ||
306 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000307
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000308 case SRE_AT_END:
Guido van Rossum29530882000-04-10 17:06:55 +0000309 return ((void*) ptr == state->end);
Fredrik Lundh80946112000-06-29 18:03:25 +0000310
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000311 case SRE_AT_END_LINE:
312 return ((void*) ptr == state->end ||
313 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000314
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000315 case SRE_AT_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000316 if (state->beginning == state->end)
317 return 0;
318 that = ((void*) ptr > state->beginning) ?
319 SRE_IS_WORD((int) ptr[-1]) : 0;
320 this = ((void*) ptr < state->end) ?
321 SRE_IS_WORD((int) ptr[0]) : 0;
322 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000323
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000324 case SRE_AT_NON_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000325 if (state->beginning == state->end)
326 return 0;
327 that = ((void*) ptr > state->beginning) ?
328 SRE_IS_WORD((int) ptr[-1]) : 0;
329 this = ((void*) ptr < state->end) ?
330 SRE_IS_WORD((int) ptr[0]) : 0;
331 return this == that;
332 }
333
334 return 0;
335}
336
337LOCAL(int)
338SRE_MEMBER(SRE_CODE* set, SRE_CHAR ch)
339{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000340 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000341
342 int ok = 1;
343
344 for (;;) {
345 switch (*set++) {
346
347 case SRE_OP_NEGATE:
348 ok = !ok;
349 break;
350
351 case SRE_OP_FAILURE:
352 return !ok;
353
354 case SRE_OP_LITERAL:
355 if (ch == (SRE_CHAR) set[0])
356 return ok;
357 set++;
358 break;
359
360 case SRE_OP_RANGE:
361 if ((SRE_CHAR) set[0] <= ch && ch <= (SRE_CHAR) set[1])
362 return ok;
363 set += 2;
364 break;
365
366 case SRE_OP_CATEGORY:
367 if (sre_category(set[0], (int) ch))
368 return ok;
369 set += 1;
370 break;
371
372 default:
Fredrik Lundh80946112000-06-29 18:03:25 +0000373 /* internal error -- there's not much we can do about it
374 here, so let's just pretend it didn't match... */
Guido van Rossumb700df92000-03-31 14:59:30 +0000375 return 0;
376 }
377 }
378}
379
380LOCAL(int)
381SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
382{
383 /* check if string matches the given pattern. returns -1 for
384 error, 0 for failure, and 1 for success */
385
386 SRE_CHAR* end = state->end;
387 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000388 int stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000389 int stackbase;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000390 int lastmark;
Guido van Rossumb700df92000-03-31 14:59:30 +0000391 int i, count;
392
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000393 /* FIXME: this is a hack! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000394 void* mark_copy[SRE_MARK_SIZE];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000395 void* mark = NULL;
396
397 TRACE(("%8d: enter\n", PTR(ptr)));
398
399 stackbase = stack = state->stackbase;
400 lastmark = state->lastmark;
401
402 retry:
Guido van Rossumb700df92000-03-31 14:59:30 +0000403
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000404 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000405
406 switch (*pattern++) {
407
408 case SRE_OP_FAILURE:
409 /* immediate failure */
410 TRACE(("%8d: failure\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000411 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000412
413 case SRE_OP_SUCCESS:
414 /* end of pattern */
415 TRACE(("%8d: success\n", PTR(ptr)));
416 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000417 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000418
419 case SRE_OP_AT:
420 /* match at given position */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000421 /* args: <at> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000422 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
Guido van Rossumb700df92000-03-31 14:59:30 +0000423 if (!SRE_AT(state, ptr, *pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000424 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000425 pattern++;
426 break;
427
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000428 case SRE_OP_CATEGORY:
429 /* match at given category */
430 /* args: <category> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000431 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
432 *ptr, *pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000433 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
434 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000435 TRACE(("%8d: category ok\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000436 pattern++;
437 ptr++;
438 break;
439
Guido van Rossumb700df92000-03-31 14:59:30 +0000440 case SRE_OP_LITERAL:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000441 /* match literal string */
Guido van Rossumb700df92000-03-31 14:59:30 +0000442 /* args: <code> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000443 TRACE(("%8d: literal %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
444 if (ptr >= end || *ptr != (SRE_CHAR) pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000445 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000446 pattern++;
447 ptr++;
448 break;
449
450 case SRE_OP_NOT_LITERAL:
451 /* match anything that is not literal character */
452 /* args: <code> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000453 TRACE(("%8d: literal not %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
454 if (ptr >= end || *ptr == (SRE_CHAR) pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000455 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000456 pattern++;
457 ptr++;
458 break;
459
460 case SRE_OP_ANY:
461 /* match anything */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000462 TRACE(("%8d: anything\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000463 if (ptr >= end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000464 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000465 ptr++;
466 break;
467
468 case SRE_OP_IN:
469 /* match set member (or non_member) */
470 /* args: <skip> <set> */
471 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
472 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000473 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000474 pattern += pattern[0];
475 ptr++;
476 break;
477
478 case SRE_OP_GROUP:
479 /* match backreference */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000480 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000481 i = pattern[0];
482 {
Guido van Rossumb700df92000-03-31 14:59:30 +0000483 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
484 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
485 if (!p || !e || e < p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000486 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000487 while (p < e) {
488 if (ptr >= end || *ptr != *p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000489 goto failure;
490 p++; ptr++;
491 }
492 }
493 pattern++;
494 break;
495
496 case SRE_OP_GROUP_IGNORE:
497 /* match backreference */
498 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
499 i = pattern[0];
500 {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000501 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
502 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000503 if (!p || !e || e < p)
504 goto failure;
505 while (p < e) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000506 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000507 state->lower(*ptr) != state->lower(*p))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000508 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000509 p++; ptr++;
510 }
511 }
512 pattern++;
513 break;
514
515 case SRE_OP_LITERAL_IGNORE:
516 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), (SRE_CHAR) *pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000517 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000518 state->lower(*ptr) != state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000519 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000520 pattern++;
521 ptr++;
522 break;
523
524 case SRE_OP_NOT_LITERAL_IGNORE:
525 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr),
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000526 (SRE_CHAR) *pattern));
527 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000528 state->lower(*ptr) == state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000529 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000530 pattern++;
531 ptr++;
532 break;
533
534 case SRE_OP_IN_IGNORE:
535 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
536 if (ptr >= end
Fredrik Lundhb389df32000-06-29 12:48:37 +0000537 || !SRE_MEMBER(pattern+1, (SRE_CHAR) state->lower(*ptr)))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000538 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000539 pattern += pattern[0];
540 ptr++;
541 break;
542
543 case SRE_OP_MARK:
544 /* set mark */
545 /* args: <mark> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000546 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
547 if (state->lastmark < pattern[0])
548 state->lastmark = pattern[0];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000549 if (!mark) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000550 mark = mark_copy;
551 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000552 }
553 state->mark[pattern[0]] = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000554 pattern++;
555 break;
556
557 case SRE_OP_JUMP:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000558 case SRE_OP_INFO:
Guido van Rossumb700df92000-03-31 14:59:30 +0000559 /* jump forward */
560 /* args: <skip> */
561 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
562 pattern += pattern[0];
563 break;
564
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000565#if 0
Guido van Rossumb700df92000-03-31 14:59:30 +0000566 case SRE_OP_CALL:
567 /* match subpattern, without backtracking */
568 /* args: <skip> <pattern> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000569 TRACE(("%8d: subpattern\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000570 state->ptr = ptr;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000571 i = SRE_MATCH(state, pattern + 1);
572 if (i < 0)
573 return i;
574 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000575 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000576 pattern += pattern[0];
577 ptr = state->ptr;
578 break;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000579#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000580
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000581#if 0
Guido van Rossumb700df92000-03-31 14:59:30 +0000582 case SRE_OP_MAX_REPEAT_ONE:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000583 /* match repeated sequence (maximizing regexp) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000584
585 /* this operator only works if the repeated item is
586 exactly one character wide, and we're not already
587 collecting backtracking points. for other cases,
588 use the MAX_REPEAT operator instead */
589
Guido van Rossumb700df92000-03-31 14:59:30 +0000590 /* args: <skip> <min> <max> <step> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000591 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
592 pattern[1], pattern[2]));
593
594 count = 0;
595
596 if (pattern[3] == SRE_OP_ANY) {
597 /* repeated wildcard. skip to the end of the target
598 string, and backtrack from there */
599 /* FIXME: must look for line endings */
600 if (ptr + pattern[1] > end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000601 goto failure; /* cannot match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000602 count = pattern[2];
603 if (count > end - ptr)
604 count = end - ptr;
605 ptr += count;
606
607 } else if (pattern[3] == SRE_OP_LITERAL) {
608 /* repeated literal */
609 SRE_CHAR chr = (SRE_CHAR) pattern[4];
610 while (count < (int) pattern[2]) {
611 if (ptr >= end || *ptr != chr)
612 break;
613 ptr++;
614 count++;
615 }
616
617 } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
618 /* repeated literal */
619 SRE_CHAR chr = (SRE_CHAR) pattern[4];
620 while (count < (int) pattern[2]) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000621 if (ptr >= end || (SRE_CHAR) state->lower(*ptr) != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000622 break;
623 ptr++;
624 count++;
625 }
626
627 } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
628 /* repeated non-literal */
629 SRE_CHAR chr = (SRE_CHAR) pattern[4];
630 while (count < (int) pattern[2]) {
631 if (ptr >= end || *ptr == chr)
632 break;
633 ptr++;
634 count++;
635 }
636
637 } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
638 /* repeated non-literal */
639 SRE_CHAR chr = (SRE_CHAR) pattern[4];
640 while (count < (int) pattern[2]) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000641 if (ptr >= end || (SRE_CHAR) state->lower(*ptr) == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000642 break;
643 ptr++;
644 count++;
645 }
646
647 } else if (pattern[3] == SRE_OP_IN) {
648 /* repeated set */
649 while (count < (int) pattern[2]) {
650 if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
651 break;
652 ptr++;
653 count++;
654 }
655
656 } else {
657 /* repeated single character pattern */
658 state->ptr = ptr;
659 while (count < (int) pattern[2]) {
660 i = SRE_MATCH(state, pattern + 3);
661 if (i < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000662 return i;
663 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000664 break;
665 count++;
666 }
667 state->ptr = ptr;
668 ptr += count;
669 }
670
671 /* when we arrive here, count contains the number of
672 matches, and ptr points to the tail of the target
673 string. check if the rest of the pattern matches, and
674 backtrack if not. */
675
Guido van Rossumb700df92000-03-31 14:59:30 +0000676 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
677
678 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000679 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000680
681 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
682 /* tail is empty. we're finished */
683 TRACE(("%8d: tail is empty\n", PTR(ptr)));
684 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000685 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000686
687 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000688 /* tail starts with a literal. skip positions where
689 the rest of the pattern cannot possibly match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000690 SRE_CHAR chr = (SRE_CHAR) pattern[pattern[0]+1];
691 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
692 for (;;) {
693 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
694 while (count >= (int) pattern[1] &&
695 (ptr >= end || *ptr != chr)) {
696 ptr--;
697 count--;
698 }
699 TRACE(("%8d: check tail\n", PTR(ptr)));
700 if (count < (int) pattern[1])
701 break;
702 state->ptr = ptr;
703 i = SRE_MATCH(state, pattern + pattern[0]);
704 if (i > 0) {
705 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000706 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000707 }
708 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
709 ptr--;
710 count--;
711 }
712
713 } else {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000714 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +0000715 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
716 while (count >= (int) pattern[1]) {
717 state->ptr = ptr;
718 i = SRE_MATCH(state, pattern + pattern[0]);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000719 if (i < 0)
720 return i;
721 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000722 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000723 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000724 }
725 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
726 ptr--;
727 count--;
728 }
729 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000731#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000732
733 case SRE_OP_MAX_REPEAT:
734 /* match repeated sequence (maximizing regexp). repeated
735 group should end with a MAX_UNTIL code */
736
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000737 /* args: <skip> <min> <max> <item> */
738
739 TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
Guido van Rossumb700df92000-03-31 14:59:30 +0000740 pattern[1], pattern[2]));
741
742 count = 0;
743 state->ptr = ptr;
744
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000745 /* match minimum number of items */
746 while (count < (int) pattern[1]) {
747 i = SRE_MATCH(state, pattern + 3);
748 if (i < 0)
749 return i;
750 if (!i)
751 goto failure;
752 if (state->ptr == ptr) {
753 /* if the match was successful but empty, set the
754 count to max and terminate the scanning loop */
755 count = (int) pattern[2];
756 break;
757 }
758 count++;
759 ptr = state->ptr;
760 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000761
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000762 TRACE(("%8d: found %d leading items\n", PTR(ptr), count));
Guido van Rossumb700df92000-03-31 14:59:30 +0000763
764 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000765 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000766
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000767 /* match maximum number of items, pushing alternate end
768 points to the stack */
Guido van Rossumb700df92000-03-31 14:59:30 +0000769
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000770 while (pattern[2] == 32767 || count < (int) pattern[2]) {
771 state->stackbase = stack;
772 i = SRE_MATCH(state, pattern + 3);
773 state->stackbase = stackbase; /* rewind */
774 if (i < 0)
775 return i;
776 if (!i)
777 break;
778 if (state->ptr == ptr) {
779 count = (int) pattern[2];
780 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000781 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000782 /* this position was valid; add it to the retry
783 stack */
784 if (stack >= state->stacksize) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000785 i = stack_extend(state, stack + 1,
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000786 stackbase + pattern[2]);
787 if (i < 0)
788 return i; /* out of memory */
789 }
790 TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
791 state->stack[stack].ptr = ptr;
792 state->stack[stack].pattern = pattern + pattern[0];
793 stack++;
794 /* move forward */
795 ptr = state->ptr;
796 count++;
Guido van Rossumb700df92000-03-31 14:59:30 +0000797 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000798
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000799 /* when we get here, count is the number of successful
800 matches, and ptr points to the tail. */
Guido van Rossumb700df92000-03-31 14:59:30 +0000801
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000802 TRACE(("%8d: skip +%d\n", PTR(ptr), pattern[0]));
803
804 pattern += pattern[0];
805 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000806
807 case SRE_OP_MIN_REPEAT:
808 /* match repeated sequence (minimizing regexp) */
809 TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
810 pattern[1], pattern[2]));
811 count = 0;
812 state->ptr = ptr;
813 /* match minimum number of items */
814 while (count < (int) pattern[1]) {
815 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000816 if (i < 0)
817 return i;
818 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000819 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000820 count++;
821 }
822 /* move forward until the tail matches. */
823 while (count <= (int) pattern[2]) {
824 ptr = state->ptr;
825 i = SRE_MATCH(state, pattern + pattern[0]);
826 if (i > 0) {
827 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000828 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000829 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000830 state->ptr = ptr; /* backtrack */
831 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000832 if (i < 0)
833 return i;
834 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000835 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000836 count++;
837 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000838 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000839
Guido van Rossumb700df92000-03-31 14:59:30 +0000840 case SRE_OP_BRANCH:
841 /* match one of several subpatterns */
842 /* format: <branch> <size> <head> ... <null> <tail> */
843 TRACE(("%8d: branch\n", PTR(ptr)));
844 while (*pattern) {
845 if (pattern[1] != SRE_OP_LITERAL ||
846 (ptr < end && *ptr == (SRE_CHAR) pattern[2])) {
847 TRACE(("%8d: branch check\n", PTR(ptr)));
848 state->ptr = ptr;
849 i = SRE_MATCH(state, pattern + 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000850 if (i < 0)
851 return i;
852 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000853 TRACE(("%8d: branch succeeded\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000854 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000855 }
856 }
857 pattern += *pattern;
858 }
859 TRACE(("%8d: branch failed\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000860 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
862 case SRE_OP_REPEAT:
863 /* TEMPLATE: match repeated sequence (no backtracking) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000864 /* args: <skip> <min> <max> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000865 TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
866 count = 0;
867 state->ptr = ptr;
868 while (count < (int) pattern[2]) {
869 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000870 if (i < 0)
871 return i;
872 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000873 break;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000874 if (state->ptr == ptr) {
875 count = (int) pattern[2];
876 break;
877 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000878 count++;
879 }
880 if (count <= (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000881 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000882 TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
883 pattern += pattern[0];
884 ptr = state->ptr;
885 break;
886
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000887 default:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000888 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000889 return SRE_ERROR_ILLEGAL;
890 }
891 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000892
893 failure:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000894 if (stack-- > stackbase) {
895 ptr = state->stack[stack].ptr;
896 pattern = state->stack[stack].pattern;
897 TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
898 goto retry;
899 }
900 TRACE(("%8d: leave (failure)\n", PTR(ptr)));
901 state->stackbase = stackbase;
902 state->lastmark = lastmark;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000903 if (mark)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000904 memcpy(state->mark, mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000905 return 0;
906
907 success:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000908 TRACE(("%8d: leave (success)\n", PTR(ptr)));
909 state->stackbase = stackbase;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000910 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000911}
912
913LOCAL(int)
914SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
915{
916 SRE_CHAR* ptr = state->start;
917 SRE_CHAR* end = state->end;
918 int status = 0;
Fredrik Lundh80946112000-06-29 18:03:25 +0000919 int prefix_len = 0;
920 SRE_CODE* prefix = NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000921
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000922 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000923 /* args: <skip> <min> <max> <prefix> <prefix data...> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000924 end -= pattern[2];
Fredrik Lundh80946112000-06-29 18:03:25 +0000925 prefix_len = pattern[4];
926 prefix = pattern + 5;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000927 pattern += pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000928 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000929
Fredrik Lundh80946112000-06-29 18:03:25 +0000930 /* if (prefix_len > 0) ... */
931
Guido van Rossumb700df92000-03-31 14:59:30 +0000932 if (pattern[0] == SRE_OP_LITERAL) {
933 /* pattern starts with a literal */
934 SRE_CHAR chr = (SRE_CHAR) pattern[1];
935 for (;;) {
936 while (ptr < end && *ptr != chr)
937 ptr++;
938 if (ptr == end)
939 return 0;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000940 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000941 state->start = ptr;
942 state->ptr = ++ptr;
943 status = SRE_MATCH(state, pattern + 2);
944 if (status != 0)
945 break;
946 }
947
Guido van Rossumb700df92000-03-31 14:59:30 +0000948 } else
949 while (ptr <= end) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000950 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000951 state->start = state->ptr = ptr++;
952 status = SRE_MATCH(state, pattern);
953 if (status != 0)
954 break;
955 }
956
957 return status;
958}
959
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000960#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000961
962/* -------------------------------------------------------------------- */
963/* factories and destructors */
964
965/* see sre.h for object declarations */
966
967staticforward PyTypeObject Pattern_Type;
968staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000969staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +0000970
971static PyObject *
972_compile(PyObject* self_, PyObject* args)
973{
974 /* "compile" pattern descriptor to pattern object */
975
976 PatternObject* self;
977
978 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000979 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000980 PyObject* code;
981 int groups = 0;
982 PyObject* groupindex = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000983 if (!PyArg_ParseTuple(args, "OiO!|iO", &pattern, &flags,
984 &PyString_Type, &code,
985 &groups, &groupindex))
Guido van Rossumb700df92000-03-31 14:59:30 +0000986 return NULL;
987
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000988 self = PyObject_NEW(PatternObject, &Pattern_Type);
Guido van Rossumb700df92000-03-31 14:59:30 +0000989 if (self == NULL)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000990
Guido van Rossumb700df92000-03-31 14:59:30 +0000991 return NULL;
992
993 Py_INCREF(pattern);
994 self->pattern = pattern;
995
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000996 self->flags = flags;
997
Guido van Rossumb700df92000-03-31 14:59:30 +0000998 Py_INCREF(code);
999 self->code = code;
1000
1001 self->groups = groups;
1002
1003 Py_XINCREF(groupindex);
1004 self->groupindex = groupindex;
1005
1006 return (PyObject*) self;
1007}
1008
1009static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001010sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001011{
1012 return Py_BuildValue("i", sizeof(SRE_CODE));
1013}
1014
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001015static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001016sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001017{
1018 int character, flags;
1019 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1020 return NULL;
1021 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001022 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001023#if defined(HAVE_UNICODE)
1024 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001025 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001026#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001027 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001028}
1029
Guido van Rossumb700df92000-03-31 14:59:30 +00001030LOCAL(PyObject*)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001031state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001032{
1033 /* prepare state object */
1034
1035 PyBufferProcs *buffer;
1036 int i, count;
1037 void* ptr;
1038
1039 PyObject* string;
1040 int start = 0;
1041 int end = INT_MAX;
1042 if (!PyArg_ParseTuple(args, "O|ii", &string, &start, &end))
1043 return NULL;
1044
1045 /* get pointer to string buffer */
1046 buffer = string->ob_type->tp_as_buffer;
1047 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1048 buffer->bf_getsegcount(string, NULL) != 1) {
1049 PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
1050 return NULL;
1051 }
1052
1053 /* determine buffer size */
1054 count = buffer->bf_getreadbuffer(string, 0, &ptr);
1055 if (count < 0) {
1056 /* sanity check */
1057 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1058 return NULL;
1059 }
1060
1061 /* determine character size */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001062#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001063 state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001064#else
1065 state->charsize = 1;
1066#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001067
1068 count /= state->charsize;
1069
1070 /* adjust boundaries */
1071 if (start < 0)
1072 start = 0;
1073 else if (start > count)
1074 start = count;
1075
1076 if (end < 0)
1077 end = 0;
1078 else if (end > count)
1079 end = count;
1080
1081 state->beginning = ptr;
1082
1083 state->start = (void*) ((char*) ptr + start * state->charsize);
1084 state->end = (void*) ((char*) ptr + end * state->charsize);
1085
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001086 state->lastmark = 0;
1087
Guido van Rossumb700df92000-03-31 14:59:30 +00001088 /* FIXME: dynamic! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001089 for (i = 0; i < SRE_MARK_SIZE; i++)
Guido van Rossumb700df92000-03-31 14:59:30 +00001090 state->mark[i] = NULL;
1091
1092 state->stack = NULL;
1093 state->stackbase = 0;
1094 state->stacksize = 0;
1095
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001096 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001097 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001098#if defined(HAVE_UNICODE)
1099 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001100 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001101#endif
1102 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001103 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001104
Guido van Rossumb700df92000-03-31 14:59:30 +00001105 return string;
1106}
1107
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001108LOCAL(void)
1109state_fini(SRE_STATE* state)
1110{
1111 stack_free(state);
1112}
1113
1114LOCAL(PyObject*)
1115state_getslice(SRE_STATE* state, int index, PyObject* string)
1116{
1117 index = (index - 1) * 2;
1118
1119 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1120 Py_INCREF(Py_None);
1121 return Py_None;
1122 }
1123
1124 return PySequence_GetSlice(
1125 string,
1126 ((char*)state->mark[index] - (char*)state->beginning) /
1127 state->charsize,
1128 ((char*)state->mark[index+1] - (char*)state->beginning) /
1129 state->charsize
1130 );
1131}
1132
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001133static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001134pattern_new_match(PatternObject* pattern, SRE_STATE* state,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001135 PyObject* string, int status)
1136{
1137 /* create match object (from state object) */
1138
1139 MatchObject* match;
1140 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001141 char* base;
1142 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001143
1144 if (status > 0) {
1145
1146 /* create match object (with room for extra group marks) */
1147 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2*pattern->groups);
1148 if (match == NULL)
1149 return NULL;
1150
1151 Py_INCREF(pattern);
1152 match->pattern = pattern;
1153
1154 Py_INCREF(string);
1155 match->string = string;
1156
1157 match->groups = pattern->groups+1;
1158
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001159 base = (char*) state->beginning;
1160 n = state->charsize;
1161
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001162 /* group zero */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001163 match->mark[0] = ((char*) state->start - base) / n;
1164 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001165
1166 /* fill in the rest of the groups */
1167 for (i = j = 0; i < pattern->groups; i++, j+=2)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001168 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1169 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1170 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001171 } else
1172 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1173
1174 return (PyObject*) match;
1175
1176 } else if (status < 0) {
1177
1178 /* internal error */
1179 PyErr_SetString(
1180 PyExc_RuntimeError, "internal error in regular expression engine"
1181 );
1182 return NULL;
1183
1184 }
1185
1186 Py_INCREF(Py_None);
1187 return Py_None;
1188}
1189
1190static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001191pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001192{
1193 /* create search state object */
1194
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001195 ScannerObject* self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001196 PyObject* string;
1197
1198 /* create match object (with room for extra group marks) */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001199 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001200 if (self == NULL)
1201 return NULL;
1202
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001203 string = state_init(&self->state, pattern, args);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001204 if (!string) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001205 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001206 return NULL;
1207 }
1208
1209 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001210 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001211
1212 Py_INCREF(string);
1213 self->string = string;
1214
1215 return (PyObject*) self;
1216}
1217
Guido van Rossumb700df92000-03-31 14:59:30 +00001218static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001219pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001220{
1221 Py_XDECREF(self->code);
1222 Py_XDECREF(self->pattern);
1223 Py_XDECREF(self->groupindex);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001224 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001225}
1226
1227static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001228pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001229{
1230 SRE_STATE state;
1231 PyObject* string;
1232 int status;
1233
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001234 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001235 if (!string)
1236 return NULL;
1237
1238 state.ptr = state.start;
1239
1240 if (state.charsize == 1) {
1241 status = sre_match(&state, PatternObject_GetCode(self));
1242 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001243#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001244 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001245#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001246 }
1247
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001248 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001249
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001250 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001251}
1252
1253static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001254pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001255{
1256 SRE_STATE state;
1257 PyObject* string;
1258 int status;
1259
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001260 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001261 if (!string)
1262 return NULL;
1263
1264 if (state.charsize == 1) {
1265 status = sre_search(&state, PatternObject_GetCode(self));
1266 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001267#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001268 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001269#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001270 }
1271
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001272 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001273
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001274 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001275}
1276
1277static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001278call(char* function, PyObject* args)
1279{
1280 PyObject* name;
1281 PyObject* module;
1282 PyObject* func;
1283 PyObject* result;
1284
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001285 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001286 if (!name)
1287 return NULL;
1288 module = PyImport_Import(name);
1289 Py_DECREF(name);
1290 if (!module)
1291 return NULL;
1292 func = PyObject_GetAttrString(module, function);
1293 Py_DECREF(module);
1294 if (!func)
1295 return NULL;
1296 result = PyObject_CallObject(func, args);
1297 Py_DECREF(func);
1298 Py_DECREF(args);
1299 return result;
1300}
1301
1302static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001303pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001304{
1305 PyObject* template;
1306 PyObject* string;
1307 PyObject* count;
1308 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1309 return NULL;
1310
1311 /* delegate to Python code */
1312 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1313}
1314
1315static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001316pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001317{
1318 PyObject* template;
1319 PyObject* string;
1320 PyObject* count;
1321 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1322 return NULL;
1323
1324 /* delegate to Python code */
1325 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1326}
1327
1328static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001329pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001330{
1331 PyObject* string;
1332 PyObject* maxsplit;
1333 if (!PyArg_ParseTuple(args, "OO", &string, &maxsplit))
1334 return NULL;
1335
1336 /* delegate to Python code */
1337 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1338}
1339
1340static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001341pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001342{
Guido van Rossumb700df92000-03-31 14:59:30 +00001343 SRE_STATE state;
1344 PyObject* string;
1345 PyObject* list;
1346 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001347 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001348
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001349 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001350 if (!string)
1351 return NULL;
1352
1353 list = PyList_New(0);
1354
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001355 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001356
1357 PyObject* item;
1358
1359 state.ptr = state.start;
1360
1361 if (state.charsize == 1) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001362 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +00001363 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001364#if defined(HAVE_UNICODE)
1365 status = sre_usearch(&state, PatternObject_GetCode(self));
1366#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001367 }
1368
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001369 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001370
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001371 /* don't bother to build a match object */
1372 switch (self->groups) {
1373 case 0:
1374 item = PySequence_GetSlice(
1375 string,
1376 ((char*) state.start - (char*) state.beginning) /
1377 state.charsize,
1378 ((char*) state.ptr - (char*) state.beginning) /
1379 state.charsize);
1380 if (!item)
1381 goto error;
1382 break;
1383 case 1:
1384 item = state_getslice(&state, 1, string);
1385 if (!item)
1386 goto error;
1387 break;
1388 default:
1389 item = PyTuple_New(self->groups);
1390 if (!item)
1391 goto error;
1392 for (i = 0; i < self->groups; i++) {
1393 PyObject* o = state_getslice(&state, i+1, string);
1394 if (!o) {
1395 Py_DECREF(item);
1396 goto error;
1397 }
1398 PyTuple_SET_ITEM(item, i, o);
1399 }
1400 break;
1401 }
1402
1403 if (PyList_Append(list, item) < 0) {
1404 Py_DECREF(item);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001405 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001406 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001407
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001408 if (state.ptr == state.start)
1409 state.start = (void*) ((char*) state.ptr + state.charsize);
1410 else
1411 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001412
1413 } else {
1414
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001415 if (status == 0)
1416 break;
1417
Guido van Rossumb700df92000-03-31 14:59:30 +00001418 /* internal error */
1419 PyErr_SetString(
1420 PyExc_RuntimeError,
1421 "internal error in regular expression engine"
1422 );
1423 goto error;
1424
1425 }
1426 }
1427
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001428 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001429 return list;
1430
1431error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001432 Py_DECREF(list);
1433 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001434 return NULL;
1435
1436}
1437
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001438static PyMethodDef pattern_methods[] = {
1439 {"match", (PyCFunction) pattern_match, 1},
1440 {"search", (PyCFunction) pattern_search, 1},
1441 {"sub", (PyCFunction) pattern_sub, 1},
1442 {"subn", (PyCFunction) pattern_subn, 1},
1443 {"split", (PyCFunction) pattern_split, 1},
1444 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001445 /* experimental */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001446 {"scanner", (PyCFunction) pattern_scanner, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001447 {NULL, NULL}
1448};
1449
1450static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001451pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001452{
1453 PyObject* res;
1454
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001455 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001456
1457 if (res)
1458 return res;
1459
1460 PyErr_Clear();
1461
1462 /* attributes */
1463 if (!strcmp(name, "pattern")) {
1464 Py_INCREF(self->pattern);
1465 return self->pattern;
1466 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001467
1468 if (!strcmp(name, "flags"))
1469 return Py_BuildValue("i", self->flags);
1470
1471 if (!strcmp(name, "groupindex") && self->groupindex) {
1472 Py_INCREF(self->groupindex);
1473 return self->groupindex;
1474 }
1475
Guido van Rossumb700df92000-03-31 14:59:30 +00001476 PyErr_SetString(PyExc_AttributeError, name);
1477 return NULL;
1478}
1479
1480statichere PyTypeObject Pattern_Type = {
1481 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001482 0, "SRE_Pattern", sizeof(PatternObject), 0,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001483 (destructor)pattern_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001484 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001485 (getattrfunc)pattern_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001486};
1487
1488/* -------------------------------------------------------------------- */
1489/* match methods */
1490
1491static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001492match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001493{
1494 Py_XDECREF(self->string);
1495 Py_DECREF(self->pattern);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001496 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001497}
1498
1499static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001500match_getslice_by_index(MatchObject* self, int index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001501{
1502 if (index < 0 || index >= self->groups) {
1503 /* raise IndexError if we were given a bad group number */
1504 PyErr_SetString(
1505 PyExc_IndexError,
1506 "no such group"
1507 );
1508 return NULL;
1509 }
1510
1511 if (self->string == Py_None || self->mark[index+index] < 0) {
1512 /* return None if the string or group is undefined */
1513 Py_INCREF(Py_None);
1514 return Py_None;
1515 }
1516
1517 return PySequence_GetSlice(
1518 self->string, self->mark[index+index], self->mark[index+index+1]
1519 );
1520}
1521
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001522static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001523match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001524{
1525 if (!PyInt_Check(index) && self->pattern->groupindex != NULL) {
1526 /* FIXME: resource leak? */
1527 index = PyObject_GetItem(self->pattern->groupindex, index);
1528 if (!index)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001529 return -1;
Guido van Rossumb700df92000-03-31 14:59:30 +00001530 }
1531
1532 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001533 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001534
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001535 return -1;
1536}
1537
1538static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001539match_getslice(MatchObject* self, PyObject* index)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001540{
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001541 return match_getslice_by_index(self, match_getindex(self, index));
Guido van Rossumb700df92000-03-31 14:59:30 +00001542}
1543
1544static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001545match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001546{
1547 PyObject* result;
1548 int i, size;
1549
1550 size = PyTuple_GET_SIZE(args);
1551
1552 switch (size) {
1553 case 0:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001554 result = match_getslice(self, Py_False);
Guido van Rossumb700df92000-03-31 14:59:30 +00001555 break;
1556 case 1:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001557 result = match_getslice(self, PyTuple_GET_ITEM(args, 0));
Guido van Rossumb700df92000-03-31 14:59:30 +00001558 break;
1559 default:
1560 /* fetch multiple items */
1561 result = PyTuple_New(size);
1562 if (!result)
1563 return NULL;
1564 for (i = 0; i < size; i++) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001565 PyObject* item = match_getslice(self, PyTuple_GET_ITEM(args, i));
Guido van Rossumb700df92000-03-31 14:59:30 +00001566 if (!item) {
1567 Py_DECREF(result);
1568 return NULL;
1569 }
1570 PyTuple_SET_ITEM(result, i, item);
1571 }
1572 break;
1573 }
1574 return result;
1575}
1576
1577static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001578match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001579{
1580 PyObject* result;
1581 int index;
1582
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001583 /* FIXME: <fl> handle default value! */
1584
Guido van Rossumb700df92000-03-31 14:59:30 +00001585 result = PyTuple_New(self->groups-1);
1586 if (!result)
1587 return NULL;
1588
1589 for (index = 1; index < self->groups; index++) {
1590 PyObject* item;
1591 /* FIXME: <fl> handle default! */
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001592 item = match_getslice_by_index(self, index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001593 if (!item) {
1594 Py_DECREF(result);
1595 return NULL;
1596 }
1597 PyTuple_SET_ITEM(result, index-1, item);
1598 }
1599
1600 return result;
1601}
1602
1603static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001604match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001605{
1606 PyObject* result;
1607 PyObject* keys;
1608 int index;
1609
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001610 /* FIXME: <fl> handle default value! */
1611
Guido van Rossumb700df92000-03-31 14:59:30 +00001612 result = PyDict_New();
1613 if (!result)
1614 return NULL;
1615 if (!self->pattern->groupindex)
1616 return result;
1617
1618 keys = PyMapping_Keys(self->pattern->groupindex);
1619 if (!keys)
1620 return NULL;
1621
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001622 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001623 PyObject* key;
1624 PyObject* item;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001625 key = PyList_GET_ITEM(keys, index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001626 if (!key) {
1627 Py_DECREF(keys);
1628 Py_DECREF(result);
1629 return NULL;
1630 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001631 item = match_getslice(self, key);
Guido van Rossumb700df92000-03-31 14:59:30 +00001632 if (!item) {
1633 Py_DECREF(key);
1634 Py_DECREF(keys);
1635 Py_DECREF(result);
1636 return NULL;
1637 }
1638 /* FIXME: <fl> this can fail, right? */
1639 PyDict_SetItem(result, key, item);
1640 }
1641
1642 Py_DECREF(keys);
1643
1644 return result;
1645}
1646
1647static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001648match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001649{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001650 int index;
1651
1652 PyObject* index_ = Py_False;
1653 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001654 return NULL;
1655
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001656 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001657
Guido van Rossumb700df92000-03-31 14:59:30 +00001658 if (index < 0 || index >= self->groups) {
1659 PyErr_SetString(
1660 PyExc_IndexError,
1661 "no such group"
1662 );
1663 return NULL;
1664 }
1665
1666 if (self->mark[index*2] < 0) {
1667 Py_INCREF(Py_None);
1668 return Py_None;
1669 }
1670
1671 return Py_BuildValue("i", self->mark[index*2]);
1672}
1673
1674static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001675match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001676{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001677 int index;
1678
1679 PyObject* index_ = Py_False;
1680 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001681 return NULL;
1682
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001683 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001684
Guido van Rossumb700df92000-03-31 14:59:30 +00001685 if (index < 0 || index >= self->groups) {
1686 PyErr_SetString(
1687 PyExc_IndexError,
1688 "no such group"
1689 );
1690 return NULL;
1691 }
1692
1693 if (self->mark[index*2] < 0) {
1694 Py_INCREF(Py_None);
1695 return Py_None;
1696 }
1697
1698 return Py_BuildValue("i", self->mark[index*2+1]);
1699}
1700
1701static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001702match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001703{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001704 int index;
1705
1706 PyObject* index_ = Py_False;
1707 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001708 return NULL;
1709
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001710 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001711
Guido van Rossumb700df92000-03-31 14:59:30 +00001712 if (index < 0 || index >= self->groups) {
1713 PyErr_SetString(
1714 PyExc_IndexError,
1715 "no such group"
1716 );
1717 return NULL;
1718 }
1719
1720 if (self->mark[index*2] < 0) {
1721 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001722 Py_INCREF(Py_None);
1723 return Py_BuildValue("OO", Py_None, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001724 }
1725
1726 return Py_BuildValue("ii", self->mark[index*2], self->mark[index*2+1]);
1727}
1728
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001729static PyMethodDef match_methods[] = {
1730 {"group", (PyCFunction) match_group, 1},
1731 {"start", (PyCFunction) match_start, 1},
1732 {"end", (PyCFunction) match_end, 1},
1733 {"span", (PyCFunction) match_span, 1},
1734 {"groups", (PyCFunction) match_groups, 1},
1735 {"groupdict", (PyCFunction) match_groupdict, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001736 {NULL, NULL}
1737};
1738
1739static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001740match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001741{
1742 PyObject* res;
1743
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001744 res = Py_FindMethod(match_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001745 if (res)
1746 return res;
1747
1748 PyErr_Clear();
1749
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001750 /* attributes */
Guido van Rossumb700df92000-03-31 14:59:30 +00001751 if (!strcmp(name, "string")) {
1752 Py_INCREF(self->string);
1753 return self->string;
1754 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001755
Guido van Rossumb700df92000-03-31 14:59:30 +00001756 if (!strcmp(name, "re")) {
1757 Py_INCREF(self->pattern);
1758 return (PyObject*) self->pattern;
1759 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001760
Guido van Rossumb700df92000-03-31 14:59:30 +00001761 if (!strcmp(name, "pos"))
1762 return Py_BuildValue("i", 0); /* FIXME */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001763
Guido van Rossumb700df92000-03-31 14:59:30 +00001764 if (!strcmp(name, "endpos"))
1765 return Py_BuildValue("i", 0); /* FIXME */
1766
1767 PyErr_SetString(PyExc_AttributeError, name);
1768 return NULL;
1769}
1770
1771/* FIXME: implement setattr("string", None) as a special case (to
1772 detach the associated string, if any */
1773
1774statichere PyTypeObject Match_Type = {
1775 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001776 0, "SRE_Match",
Guido van Rossumb700df92000-03-31 14:59:30 +00001777 sizeof(MatchObject), /* size of basic object */
1778 sizeof(int), /* space for group item */
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001779 (destructor)match_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001780 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001781 (getattrfunc)match_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001782};
1783
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001784/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001785/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001786
1787static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001788scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001789{
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001790 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001791 Py_DECREF(self->string);
1792 Py_DECREF(self->pattern);
1793 PyMem_DEL(self);
1794}
1795
1796static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001797scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001798{
1799 SRE_STATE* state = &self->state;
1800 PyObject* match;
1801 int status;
1802
1803 state->ptr = state->start;
1804
1805 if (state->charsize == 1) {
1806 status = sre_match(state, PatternObject_GetCode(self->pattern));
1807 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001808#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001809 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001810#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001811 }
1812
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001813 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001814 state, self->string, status);
1815
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001816 if (status == 0 || state->ptr == state->start)
1817 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001818 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001819 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001820
1821 return match;
1822}
1823
1824
1825static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001826scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001827{
1828 SRE_STATE* state = &self->state;
1829 PyObject* match;
1830 int status;
1831
1832 state->ptr = state->start;
1833
1834 if (state->charsize == 1) {
1835 status = sre_search(state, PatternObject_GetCode(self->pattern));
1836 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001837#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001838 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001839#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001840 }
1841
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001842 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001843 state, self->string, status);
1844
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001845 if (status == 0 || state->ptr == state->start)
1846 state->start = (void*) ((char*) state->ptr + state->charsize);
1847 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001848 state->start = state->ptr;
1849
1850 return match;
1851}
1852
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001853static PyMethodDef scanner_methods[] = {
1854 {"match", (PyCFunction) scanner_match, 0},
1855 {"search", (PyCFunction) scanner_search, 0},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001856 {NULL, NULL}
1857};
1858
1859static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001860scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001861{
1862 PyObject* res;
1863
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001864 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001865 if (res)
1866 return res;
1867
1868 PyErr_Clear();
1869
1870 /* attributes */
1871 if (!strcmp(name, "pattern")) {
1872 Py_INCREF(self->pattern);
1873 return self->pattern;
1874 }
1875
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001876 if (!strcmp(name, "groups"))
1877 return Py_BuildValue("i", ((PatternObject*) self->pattern)->groups);
1878
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001879 PyErr_SetString(PyExc_AttributeError, name);
1880 return NULL;
1881}
1882
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001883statichere PyTypeObject Scanner_Type = {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001884 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001885 0, "SRE_Scanner",
1886 sizeof(ScannerObject), /* size of basic object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001887 0,
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001888 (destructor)scanner_dealloc, /*tp_dealloc*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001889 0, /*tp_print*/
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001890 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001891};
1892
Guido van Rossumb700df92000-03-31 14:59:30 +00001893static PyMethodDef _functions[] = {
1894 {"compile", _compile, 1},
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001895 {"getcodesize", sre_codesize, 1},
Fredrik Lundhb389df32000-06-29 12:48:37 +00001896 {"getlower", sre_getlower, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001897 {NULL, NULL}
1898};
1899
1900void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001901#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00001902__declspec(dllexport)
1903#endif
1904init_sre()
1905{
1906 /* Patch object types */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001907 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001908 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001909
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001910 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00001911}
1912
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001913#endif /* !defined(SRE_RECURSIVE) */