blob: 7b1adbd177ff6ed829698427be9db86eab41321c [file] [log] [blame]
Guido van Rossumb700df92000-03-31 14:59:30 +00001/* -*- Mode: C; tab-width: 4 -*-
2 *
3 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00004 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00005 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00006 *
7 * partial history:
Fredrik Lundh436c3d582000-06-29 08:58:44 +00008 * 99-10-24 fl created (based on existing template matcher code)
Guido van Rossumb700df92000-03-31 14:59:30 +00009 * 99-11-13 fl added categories, branching, and more (0.2)
10 * 99-11-16 fl some tweaks to compile on non-Windows platforms
11 * 99-12-18 fl non-literals, generic maximizing repeat (0.3)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012 * 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
13 * 00-03-06 fl first alpha, sort of (0.5)
14 * 00-03-14 fl removed most compatibility stuff (0.6)
15 * 00-05-10 fl towards third alpha (0.8.2)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000016 * 00-05-13 fl added experimental scanner stuff (0.8.3)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000017 * 00-05-27 fl final bug hunt (0.8.4)
18 * 00-06-21 fl less bugs, more taste (0.8.5)
19 * 00-06-25 fl major changes to better deal with nested repeats (0.9)
20 * 00-06-28 fl fixed findall (0.9.1)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000021 * 00-06-29 fl fixed split, added more scanner features (0.9.2)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000022 * 00-06-30 fl tuning, fast search (0.9.3)
Guido van Rossumb700df92000-03-31 14:59:30 +000023 *
24 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
25 *
Guido van Rossumb700df92000-03-31 14:59:30 +000026 * Portions of this engine have been developed in cooperation with
27 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
28 * other compatibility work.
29 */
30
31#ifndef SRE_RECURSIVE
32
Fredrik Lundh29c08be2000-06-29 23:33:12 +000033char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000034
35#include "Python.h"
36
37#include "sre.h"
38
Guido van Rossumb700df92000-03-31 14:59:30 +000039#if defined(HAVE_LIMITS_H)
40#include <limits.h>
41#else
42#define INT_MAX 2147483647
43#endif
44
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000045#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000046
Fredrik Lundh436c3d582000-06-29 08:58:44 +000047/* name of this module, minus the leading underscore */
48#define MODULE "sre"
49
Guido van Rossumb700df92000-03-31 14:59:30 +000050/* defining this one enables tracing */
51#undef DEBUG
52
Fredrik Lundh436c3d582000-06-29 08:58:44 +000053#if PY_VERSION_HEX >= 0x01060000
54/* defining this enables unicode support (default under 1.6) */
55#define HAVE_UNICODE
56#endif
57
Fredrik Lundh29c08be2000-06-29 23:33:12 +000058/* optional features */
59#define USE_FAST_SEARCH
60
Fredrik Lundh80946112000-06-29 18:03:25 +000061#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000062#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
63/* fastest possible local call under MSVC */
64#define LOCAL(type) static __inline type __fastcall
65#else
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000067#endif
68
69/* error codes */
70#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
71#define SRE_ERROR_MEMORY -9 /* out of memory */
72
Fredrik Lundh436c3d582000-06-29 08:58:44 +000073#if defined(DEBUG)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000075#else
76#define TRACE(v)
77#endif
78
Fredrik Lundh436c3d582000-06-29 08:58:44 +000079#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000080
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000081/* -------------------------------------------------------------------- */
82/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000083
Fredrik Lundh436c3d582000-06-29 08:58:44 +000084/* default character predicates (run sre_chars.py to regenerate tables) */
85
86#define SRE_DIGIT_MASK 1
87#define SRE_SPACE_MASK 2
88#define SRE_LINEBREAK_MASK 4
89#define SRE_ALNUM_MASK 8
90#define SRE_WORD_MASK 16
91
92static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
932, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
940, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
9525, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9624, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
970, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9824, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
99
Fredrik Lundhb389df32000-06-29 12:48:37 +0000100static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000010110, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
10227, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
10344, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
10461, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
105108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
106122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
107106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
108120, 121, 122, 123, 124, 125, 126, 127 };
109
Fredrik Lundhb389df32000-06-29 12:48:37 +0000110static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000111{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000112 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000113}
114
115#define SRE_IS_DIGIT(ch)\
116 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
117#define SRE_IS_SPACE(ch)\
118 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
119#define SRE_IS_LINEBREAK(ch)\
120 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
121#define SRE_IS_ALNUM(ch)\
122 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
123#define SRE_IS_WORD(ch)\
124 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000125
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000126/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000127
Fredrik Lundhb389df32000-06-29 12:48:37 +0000128static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000129{
130 return ((ch) < 256 ? tolower((ch)) : ch);
131}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000132#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
133#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
134#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
135#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
136#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
137
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000138/* unicode-specific character predicates */
139
140#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000141static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000142{
143 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
144}
145#define SRE_UNI_TO_LOWER(ch) Py_UNICODE_TOLOWER((Py_UNICODE)(ch))
146#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
147#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
148#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
149#define SRE_UNI_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
150#define SRE_UNI_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
151#endif
152
Guido van Rossumb700df92000-03-31 14:59:30 +0000153LOCAL(int)
154sre_category(SRE_CODE category, unsigned int ch)
155{
156 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000157
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000158 case SRE_CATEGORY_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000159 return SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000160 case SRE_CATEGORY_NOT_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000161 return !SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000162 case SRE_CATEGORY_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000163 return SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000164 case SRE_CATEGORY_NOT_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000165 return !SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000166 case SRE_CATEGORY_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000167 return SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000168 case SRE_CATEGORY_NOT_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000169 return !SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000170 case SRE_CATEGORY_LINEBREAK:
171 return SRE_IS_LINEBREAK(ch);
172 case SRE_CATEGORY_NOT_LINEBREAK:
173 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000174
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000175 case SRE_CATEGORY_LOC_WORD:
176 return SRE_LOC_IS_WORD(ch);
177 case SRE_CATEGORY_LOC_NOT_WORD:
178 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000179
180#if defined(HAVE_UNICODE)
181 case SRE_CATEGORY_UNI_DIGIT:
182 return SRE_UNI_IS_DIGIT(ch);
183 case SRE_CATEGORY_UNI_NOT_DIGIT:
184 return !SRE_UNI_IS_DIGIT(ch);
185 case SRE_CATEGORY_UNI_SPACE:
186 return SRE_UNI_IS_SPACE(ch);
187 case SRE_CATEGORY_UNI_NOT_SPACE:
188 return !SRE_UNI_IS_SPACE(ch);
189 case SRE_CATEGORY_UNI_WORD:
190 return SRE_UNI_IS_WORD(ch);
191 case SRE_CATEGORY_UNI_NOT_WORD:
192 return !SRE_UNI_IS_WORD(ch);
193 case SRE_CATEGORY_UNI_LINEBREAK:
194 return SRE_UNI_IS_LINEBREAK(ch);
195 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
196 return !SRE_UNI_IS_LINEBREAK(ch);
197#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000198 }
199 return 0;
200}
201
202/* helpers */
203
204LOCAL(int)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000205stack_free(SRE_STATE* state)
Guido van Rossumb700df92000-03-31 14:59:30 +0000206{
207 if (state->stack) {
208 TRACE(("release stack\n"));
209 free(state->stack);
210 state->stack = NULL;
211 }
212 state->stacksize = 0;
213 return 0;
214}
215
216static int /* shouldn't be LOCAL */
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000217stack_extend(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000218{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000219 SRE_STACK* stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000220 int stacksize;
221
222 /* grow the stack to a suitable size; we need at least lo entries,
223 at most hi entries. if for some reason hi is lower than lo, lo
224 wins */
225
226 stacksize = state->stacksize;
227
228 if (stacksize == 0) {
229 /* create new stack */
230 stacksize = 512;
231 if (stacksize < lo)
232 stacksize = lo;
233 else if (stacksize > hi)
234 stacksize = hi;
235 TRACE(("allocate stack %d\n", stacksize));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000236 stack = malloc(sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000237 } else {
238 /* grow the stack (typically by a factor of two) */
239 while (stacksize < lo)
240 stacksize = 2 * stacksize;
241 /* FIXME: <fl> could trim size if it's larger than lo, and
242 much larger than hi */
243 TRACE(("grow stack to %d\n", stacksize));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000244 stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000245 }
246
247 if (!stack) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000248 stack_free(state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000249 return SRE_ERROR_MEMORY;
250 }
251
252 state->stack = stack;
253 state->stacksize = stacksize;
254
255 return 0;
256}
257
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000258/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000259
260#define SRE_CHAR unsigned char
261#define SRE_AT sre_at
262#define SRE_MEMBER sre_member
263#define SRE_MATCH sre_match
264#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000265
266#if defined(HAVE_UNICODE)
267
Guido van Rossumb700df92000-03-31 14:59:30 +0000268#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000269#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000270#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000271
Guido van Rossumb700df92000-03-31 14:59:30 +0000272#undef SRE_SEARCH
273#undef SRE_MATCH
274#undef SRE_MEMBER
275#undef SRE_AT
276#undef SRE_CHAR
277
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000278/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000279
280#define SRE_CHAR Py_UNICODE
281#define SRE_AT sre_uat
282#define SRE_MEMBER sre_umember
283#define SRE_MATCH sre_umatch
284#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000285#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000286
287#endif /* SRE_RECURSIVE */
288
289/* -------------------------------------------------------------------- */
290/* String matching engine */
291
292/* the following section is compiled twice, with different character
293 settings */
294
295LOCAL(int)
296SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
297{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000298 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000299
300 int this, that;
301
302 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000303
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000304 case SRE_AT_BEGINNING:
Guido van Rossum29530882000-04-10 17:06:55 +0000305 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000306
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000307 case SRE_AT_BEGINNING_LINE:
308 return ((void*) ptr == state->beginning ||
309 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000310
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000311 case SRE_AT_END:
Guido van Rossum29530882000-04-10 17:06:55 +0000312 return ((void*) ptr == state->end);
Fredrik Lundh80946112000-06-29 18:03:25 +0000313
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000314 case SRE_AT_END_LINE:
315 return ((void*) ptr == state->end ||
316 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000317
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000318 case SRE_AT_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000319 if (state->beginning == state->end)
320 return 0;
321 that = ((void*) ptr > state->beginning) ?
322 SRE_IS_WORD((int) ptr[-1]) : 0;
323 this = ((void*) ptr < state->end) ?
324 SRE_IS_WORD((int) ptr[0]) : 0;
325 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000326
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000327 case SRE_AT_NON_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000328 if (state->beginning == state->end)
329 return 0;
330 that = ((void*) ptr > state->beginning) ?
331 SRE_IS_WORD((int) ptr[-1]) : 0;
332 this = ((void*) ptr < state->end) ?
333 SRE_IS_WORD((int) ptr[0]) : 0;
334 return this == that;
335 }
336
337 return 0;
338}
339
340LOCAL(int)
341SRE_MEMBER(SRE_CODE* set, SRE_CHAR ch)
342{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000343 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000344
345 int ok = 1;
346
347 for (;;) {
348 switch (*set++) {
349
350 case SRE_OP_NEGATE:
351 ok = !ok;
352 break;
353
354 case SRE_OP_FAILURE:
355 return !ok;
356
357 case SRE_OP_LITERAL:
358 if (ch == (SRE_CHAR) set[0])
359 return ok;
360 set++;
361 break;
362
363 case SRE_OP_RANGE:
364 if ((SRE_CHAR) set[0] <= ch && ch <= (SRE_CHAR) set[1])
365 return ok;
366 set += 2;
367 break;
368
369 case SRE_OP_CATEGORY:
370 if (sre_category(set[0], (int) ch))
371 return ok;
372 set += 1;
373 break;
374
375 default:
Fredrik Lundh80946112000-06-29 18:03:25 +0000376 /* internal error -- there's not much we can do about it
377 here, so let's just pretend it didn't match... */
Guido van Rossumb700df92000-03-31 14:59:30 +0000378 return 0;
379 }
380 }
381}
382
383LOCAL(int)
384SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
385{
386 /* check if string matches the given pattern. returns -1 for
387 error, 0 for failure, and 1 for success */
388
389 SRE_CHAR* end = state->end;
390 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000391 int stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000392 int stackbase;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000393 int lastmark;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394 int i, count;
395
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000396 /* FIXME: this is a hack! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000397 void* mark_copy[SRE_MARK_SIZE];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000398 void* mark = NULL;
399
400 TRACE(("%8d: enter\n", PTR(ptr)));
401
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000402 if (pattern[0] == SRE_OP_INFO) {
403 /* optimization info block */
404 /* args: <1=skip> <2=flags> <3=min> ... */
405 if (pattern[3] && (end - ptr) < pattern[3]) {
406 TRACE(("reject (got %d chars, need %d)\n",
407 (end - ptr), pattern[3]));
408 return 0;
409 }
410 pattern += pattern[1] + 1;
411 }
412
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000413 stackbase = stack = state->stackbase;
414 lastmark = state->lastmark;
415
416 retry:
Guido van Rossumb700df92000-03-31 14:59:30 +0000417
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000418 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000419
420 switch (*pattern++) {
421
422 case SRE_OP_FAILURE:
423 /* immediate failure */
424 TRACE(("%8d: failure\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000425 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000426
427 case SRE_OP_SUCCESS:
428 /* end of pattern */
429 TRACE(("%8d: success\n", PTR(ptr)));
430 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000431 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000432
433 case SRE_OP_AT:
434 /* match at given position */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000435 /* args: <at> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000436 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
Guido van Rossumb700df92000-03-31 14:59:30 +0000437 if (!SRE_AT(state, ptr, *pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000438 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000439 pattern++;
440 break;
441
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000442 case SRE_OP_CATEGORY:
443 /* match at given category */
444 /* args: <category> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000445 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
446 *ptr, *pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000447 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
448 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000449 TRACE(("%8d: category ok\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000450 pattern++;
451 ptr++;
452 break;
453
Guido van Rossumb700df92000-03-31 14:59:30 +0000454 case SRE_OP_LITERAL:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000455 /* match literal string */
Guido van Rossumb700df92000-03-31 14:59:30 +0000456 /* args: <code> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000457 TRACE(("%8d: literal %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
458 if (ptr >= end || *ptr != (SRE_CHAR) pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000459 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000460 pattern++;
461 ptr++;
462 break;
463
464 case SRE_OP_NOT_LITERAL:
465 /* match anything that is not literal character */
466 /* args: <code> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000467 TRACE(("%8d: literal not %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
468 if (ptr >= end || *ptr == (SRE_CHAR) pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000469 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000470 pattern++;
471 ptr++;
472 break;
473
474 case SRE_OP_ANY:
475 /* match anything */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000476 TRACE(("%8d: anything\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000477 if (ptr >= end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000478 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000479 ptr++;
480 break;
481
482 case SRE_OP_IN:
483 /* match set member (or non_member) */
484 /* args: <skip> <set> */
485 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
486 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000487 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000488 pattern += pattern[0];
489 ptr++;
490 break;
491
492 case SRE_OP_GROUP:
493 /* match backreference */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000494 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000495 i = pattern[0];
496 {
Guido van Rossumb700df92000-03-31 14:59:30 +0000497 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
498 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
499 if (!p || !e || e < p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000500 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000501 while (p < e) {
502 if (ptr >= end || *ptr != *p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000503 goto failure;
504 p++; ptr++;
505 }
506 }
507 pattern++;
508 break;
509
510 case SRE_OP_GROUP_IGNORE:
511 /* match backreference */
512 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
513 i = pattern[0];
514 {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000515 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
516 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000517 if (!p || !e || e < p)
518 goto failure;
519 while (p < e) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000520 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000521 state->lower(*ptr) != state->lower(*p))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000522 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000523 p++; ptr++;
524 }
525 }
526 pattern++;
527 break;
528
529 case SRE_OP_LITERAL_IGNORE:
530 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), (SRE_CHAR) *pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000531 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000532 state->lower(*ptr) != state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000533 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000534 pattern++;
535 ptr++;
536 break;
537
538 case SRE_OP_NOT_LITERAL_IGNORE:
539 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr),
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000540 (SRE_CHAR) *pattern));
541 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000542 state->lower(*ptr) == state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000543 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000544 pattern++;
545 ptr++;
546 break;
547
548 case SRE_OP_IN_IGNORE:
549 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
550 if (ptr >= end
Fredrik Lundhb389df32000-06-29 12:48:37 +0000551 || !SRE_MEMBER(pattern+1, (SRE_CHAR) state->lower(*ptr)))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000552 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000553 pattern += pattern[0];
554 ptr++;
555 break;
556
557 case SRE_OP_MARK:
558 /* set mark */
559 /* args: <mark> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000560 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
561 if (state->lastmark < pattern[0])
562 state->lastmark = pattern[0];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000563 if (!mark) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000564 mark = mark_copy;
565 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000566 }
567 state->mark[pattern[0]] = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000568 pattern++;
569 break;
570
571 case SRE_OP_JUMP:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000572 case SRE_OP_INFO:
Guido van Rossumb700df92000-03-31 14:59:30 +0000573 /* jump forward */
574 /* args: <skip> */
575 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
576 pattern += pattern[0];
577 break;
578
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000579#if 0
Guido van Rossumb700df92000-03-31 14:59:30 +0000580 case SRE_OP_CALL:
581 /* match subpattern, without backtracking */
582 /* args: <skip> <pattern> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000583 TRACE(("%8d: subpattern\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000584 state->ptr = ptr;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000585 i = SRE_MATCH(state, pattern + 1);
586 if (i < 0)
587 return i;
588 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000589 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000590 pattern += pattern[0];
591 ptr = state->ptr;
592 break;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000593#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000594
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000595#if 0
Guido van Rossumb700df92000-03-31 14:59:30 +0000596 case SRE_OP_MAX_REPEAT_ONE:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000597 /* match repeated sequence (maximizing regexp) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000598
599 /* this operator only works if the repeated item is
600 exactly one character wide, and we're not already
601 collecting backtracking points. for other cases,
602 use the MAX_REPEAT operator instead */
603
Guido van Rossumb700df92000-03-31 14:59:30 +0000604 /* args: <skip> <min> <max> <step> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000605 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
606 pattern[1], pattern[2]));
607
608 count = 0;
609
610 if (pattern[3] == SRE_OP_ANY) {
611 /* repeated wildcard. skip to the end of the target
612 string, and backtrack from there */
613 /* FIXME: must look for line endings */
614 if (ptr + pattern[1] > end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000615 goto failure; /* cannot match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000616 count = pattern[2];
617 if (count > end - ptr)
618 count = end - ptr;
619 ptr += count;
620
621 } else if (pattern[3] == SRE_OP_LITERAL) {
622 /* repeated literal */
623 SRE_CHAR chr = (SRE_CHAR) pattern[4];
624 while (count < (int) pattern[2]) {
625 if (ptr >= end || *ptr != chr)
626 break;
627 ptr++;
628 count++;
629 }
630
631 } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
632 /* repeated literal */
633 SRE_CHAR chr = (SRE_CHAR) pattern[4];
634 while (count < (int) pattern[2]) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000635 if (ptr >= end || (SRE_CHAR) state->lower(*ptr) != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000636 break;
637 ptr++;
638 count++;
639 }
640
641 } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
642 /* repeated non-literal */
643 SRE_CHAR chr = (SRE_CHAR) pattern[4];
644 while (count < (int) pattern[2]) {
645 if (ptr >= end || *ptr == chr)
646 break;
647 ptr++;
648 count++;
649 }
650
651 } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
652 /* repeated non-literal */
653 SRE_CHAR chr = (SRE_CHAR) pattern[4];
654 while (count < (int) pattern[2]) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000655 if (ptr >= end || (SRE_CHAR) state->lower(*ptr) == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000656 break;
657 ptr++;
658 count++;
659 }
660
661 } else if (pattern[3] == SRE_OP_IN) {
662 /* repeated set */
663 while (count < (int) pattern[2]) {
664 if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
665 break;
666 ptr++;
667 count++;
668 }
669
670 } else {
671 /* repeated single character pattern */
672 state->ptr = ptr;
673 while (count < (int) pattern[2]) {
674 i = SRE_MATCH(state, pattern + 3);
675 if (i < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000676 return i;
677 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000678 break;
679 count++;
680 }
681 state->ptr = ptr;
682 ptr += count;
683 }
684
685 /* when we arrive here, count contains the number of
686 matches, and ptr points to the tail of the target
687 string. check if the rest of the pattern matches, and
688 backtrack if not. */
689
Guido van Rossumb700df92000-03-31 14:59:30 +0000690 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
691
692 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000693 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000694
695 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
696 /* tail is empty. we're finished */
697 TRACE(("%8d: tail is empty\n", PTR(ptr)));
698 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000699 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000700
701 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000702 /* tail starts with a literal. skip positions where
703 the rest of the pattern cannot possibly match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000704 SRE_CHAR chr = (SRE_CHAR) pattern[pattern[0]+1];
705 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
706 for (;;) {
707 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
708 while (count >= (int) pattern[1] &&
709 (ptr >= end || *ptr != chr)) {
710 ptr--;
711 count--;
712 }
713 TRACE(("%8d: check tail\n", PTR(ptr)));
714 if (count < (int) pattern[1])
715 break;
716 state->ptr = ptr;
717 i = SRE_MATCH(state, pattern + pattern[0]);
718 if (i > 0) {
719 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000720 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000721 }
722 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
723 ptr--;
724 count--;
725 }
726
727 } else {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000728 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +0000729 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
730 while (count >= (int) pattern[1]) {
731 state->ptr = ptr;
732 i = SRE_MATCH(state, pattern + pattern[0]);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000733 if (i < 0)
734 return i;
735 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000736 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000737 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000738 }
739 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
740 ptr--;
741 count--;
742 }
743 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000744 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000745#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000746
747 case SRE_OP_MAX_REPEAT:
748 /* match repeated sequence (maximizing regexp). repeated
749 group should end with a MAX_UNTIL code */
750
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000751 /* args: <skip> <min> <max> <item> */
752
753 TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
Guido van Rossumb700df92000-03-31 14:59:30 +0000754 pattern[1], pattern[2]));
755
756 count = 0;
757 state->ptr = ptr;
758
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000759 /* match minimum number of items */
760 while (count < (int) pattern[1]) {
761 i = SRE_MATCH(state, pattern + 3);
762 if (i < 0)
763 return i;
764 if (!i)
765 goto failure;
766 if (state->ptr == ptr) {
767 /* if the match was successful but empty, set the
768 count to max and terminate the scanning loop */
769 count = (int) pattern[2];
770 break;
771 }
772 count++;
773 ptr = state->ptr;
774 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000775
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000776 TRACE(("%8d: found %d leading items\n", PTR(ptr), count));
Guido van Rossumb700df92000-03-31 14:59:30 +0000777
778 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000779 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000780
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000781 /* match maximum number of items, pushing alternate end
782 points to the stack */
Guido van Rossumb700df92000-03-31 14:59:30 +0000783
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000784 while (pattern[2] == 32767 || count < (int) pattern[2]) {
785 state->stackbase = stack;
786 i = SRE_MATCH(state, pattern + 3);
787 state->stackbase = stackbase; /* rewind */
788 if (i < 0)
789 return i;
790 if (!i)
791 break;
792 if (state->ptr == ptr) {
793 count = (int) pattern[2];
794 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000795 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000796 /* this position was valid; add it to the retry
797 stack */
798 if (stack >= state->stacksize) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000799 i = stack_extend(state, stack + 1,
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000800 stackbase + pattern[2]);
801 if (i < 0)
802 return i; /* out of memory */
803 }
804 TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
805 state->stack[stack].ptr = ptr;
806 state->stack[stack].pattern = pattern + pattern[0];
807 stack++;
808 /* move forward */
809 ptr = state->ptr;
810 count++;
Guido van Rossumb700df92000-03-31 14:59:30 +0000811 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000812
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000813 /* when we get here, count is the number of successful
814 matches, and ptr points to the tail. */
Guido van Rossumb700df92000-03-31 14:59:30 +0000815
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000816 TRACE(("%8d: skip +%d\n", PTR(ptr), pattern[0]));
817
818 pattern += pattern[0];
819 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000820
821 case SRE_OP_MIN_REPEAT:
822 /* match repeated sequence (minimizing regexp) */
823 TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
824 pattern[1], pattern[2]));
825 count = 0;
826 state->ptr = ptr;
827 /* match minimum number of items */
828 while (count < (int) pattern[1]) {
829 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000830 if (i < 0)
831 return i;
832 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000833 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000834 count++;
835 }
836 /* move forward until the tail matches. */
837 while (count <= (int) pattern[2]) {
838 ptr = state->ptr;
839 i = SRE_MATCH(state, pattern + pattern[0]);
840 if (i > 0) {
841 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000842 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000843 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000844 state->ptr = ptr; /* backtrack */
845 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000846 if (i < 0)
847 return i;
848 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000849 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000850 count++;
851 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000852 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000853
Guido van Rossumb700df92000-03-31 14:59:30 +0000854 case SRE_OP_BRANCH:
855 /* match one of several subpatterns */
856 /* format: <branch> <size> <head> ... <null> <tail> */
857 TRACE(("%8d: branch\n", PTR(ptr)));
858 while (*pattern) {
859 if (pattern[1] != SRE_OP_LITERAL ||
860 (ptr < end && *ptr == (SRE_CHAR) pattern[2])) {
861 TRACE(("%8d: branch check\n", PTR(ptr)));
862 state->ptr = ptr;
863 i = SRE_MATCH(state, pattern + 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000864 if (i < 0)
865 return i;
866 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000867 TRACE(("%8d: branch succeeded\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000868 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000869 }
870 }
871 pattern += *pattern;
872 }
873 TRACE(("%8d: branch failed\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000874 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000875
876 case SRE_OP_REPEAT:
877 /* TEMPLATE: match repeated sequence (no backtracking) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000878 /* args: <skip> <min> <max> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000879 TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
880 count = 0;
881 state->ptr = ptr;
882 while (count < (int) pattern[2]) {
883 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000884 if (i < 0)
885 return i;
886 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000887 break;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000888 if (state->ptr == ptr) {
889 count = (int) pattern[2];
890 break;
891 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000892 count++;
893 }
894 if (count <= (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000895 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000896 TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
897 pattern += pattern[0];
898 ptr = state->ptr;
899 break;
900
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000901 default:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000902 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000903 return SRE_ERROR_ILLEGAL;
904 }
905 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000906
907 failure:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000908 if (stack-- > stackbase) {
909 ptr = state->stack[stack].ptr;
910 pattern = state->stack[stack].pattern;
911 TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
912 goto retry;
913 }
914 TRACE(("%8d: leave (failure)\n", PTR(ptr)));
915 state->stackbase = stackbase;
916 state->lastmark = lastmark;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000917 if (mark)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000918 memcpy(state->mark, mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000919 return 0;
920
921 success:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000922 TRACE(("%8d: leave (success)\n", PTR(ptr)));
923 state->stackbase = stackbase;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000924 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000925}
926
927LOCAL(int)
928SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
929{
930 SRE_CHAR* ptr = state->start;
931 SRE_CHAR* end = state->end;
932 int status = 0;
Fredrik Lundh80946112000-06-29 18:03:25 +0000933 int prefix_len = 0;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000934 SRE_CODE* prefix;
935 SRE_CODE* overlap;
936 int literal = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000937
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000938 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000939 /* optimization info block */
940 /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
941
942 if (pattern[3] > 0) {
943 /* adjust end point (but make sure we leave at least one
944 character in there) */
945 end -= pattern[3]-1;
946 if (end <= ptr)
947 end = ptr+1;
948 }
949
950 literal = pattern[2];
951
952 prefix = pattern + 6;
953 prefix_len = pattern[5];
954
955 overlap = prefix + prefix_len - 1;
956
957 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000958 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000959
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000960#if defined(USE_FAST_SEARCH)
961 if (prefix_len > 1) {
962 /* pattern starts with a known prefix. use the overlap
963 table to skip forward as fast as we possibly can */
964 int i = 0;
965 end = state->end;
966 while (ptr < end) {
967 for (;;) {
968 if (*ptr != (SRE_CHAR) prefix[i]) {
969 if (!i)
970 break;
971 else
972 i = overlap[i];
973 } else {
974 if (++i == prefix_len) {
975 /* found a potential match */
976 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
977 state->start = ptr - prefix_len + 1;
978 state->ptr = ptr + 1;
979 if (literal)
980 return 1; /* all of it */
981 status = SRE_MATCH(state, pattern + 2*prefix_len);
982 if (status != 0)
983 return status;
984 /* close but no cigar -- try again */
985 i = overlap[i];
986 }
987 break;
988 }
989
990 }
991 ptr++;
992 }
993 return 0;
994 }
995#endif
Fredrik Lundh80946112000-06-29 18:03:25 +0000996
Guido van Rossumb700df92000-03-31 14:59:30 +0000997 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000998 /* pattern starts with a literal character. this is used for
999 short prefixes, and if fast search is disabled*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001000 SRE_CHAR chr = (SRE_CHAR) pattern[1];
1001 for (;;) {
1002 while (ptr < end && *ptr != chr)
1003 ptr++;
1004 if (ptr == end)
1005 return 0;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001006 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001007 state->start = ptr;
1008 state->ptr = ++ptr;
1009 status = SRE_MATCH(state, pattern + 2);
1010 if (status != 0)
1011 break;
1012 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001013 } else
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001014 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +00001015 while (ptr <= end) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001016 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001017 state->start = state->ptr = ptr++;
1018 status = SRE_MATCH(state, pattern);
1019 if (status != 0)
1020 break;
1021 }
1022
1023 return status;
1024}
1025
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001026#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001027
1028/* -------------------------------------------------------------------- */
1029/* factories and destructors */
1030
1031/* see sre.h for object declarations */
1032
1033staticforward PyTypeObject Pattern_Type;
1034staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001035staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001036
1037static PyObject *
1038_compile(PyObject* self_, PyObject* args)
1039{
1040 /* "compile" pattern descriptor to pattern object */
1041
1042 PatternObject* self;
1043
1044 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001045 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001046 PyObject* code;
1047 int groups = 0;
1048 PyObject* groupindex = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001049 if (!PyArg_ParseTuple(args, "OiO!|iO", &pattern, &flags,
1050 &PyString_Type, &code,
1051 &groups, &groupindex))
Guido van Rossumb700df92000-03-31 14:59:30 +00001052 return NULL;
1053
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001054 self = PyObject_NEW(PatternObject, &Pattern_Type);
Guido van Rossumb700df92000-03-31 14:59:30 +00001055 if (self == NULL)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001056
Guido van Rossumb700df92000-03-31 14:59:30 +00001057 return NULL;
1058
1059 Py_INCREF(pattern);
1060 self->pattern = pattern;
1061
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001062 self->flags = flags;
1063
Guido van Rossumb700df92000-03-31 14:59:30 +00001064 Py_INCREF(code);
1065 self->code = code;
1066
1067 self->groups = groups;
1068
1069 Py_XINCREF(groupindex);
1070 self->groupindex = groupindex;
1071
1072 return (PyObject*) self;
1073}
1074
1075static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001076sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001077{
1078 return Py_BuildValue("i", sizeof(SRE_CODE));
1079}
1080
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001081static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001082sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001083{
1084 int character, flags;
1085 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1086 return NULL;
1087 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001088 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001089#if defined(HAVE_UNICODE)
1090 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001091 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001092#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001093 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001094}
1095
Guido van Rossumb700df92000-03-31 14:59:30 +00001096LOCAL(PyObject*)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001097state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001098{
1099 /* prepare state object */
1100
1101 PyBufferProcs *buffer;
1102 int i, count;
1103 void* ptr;
1104
1105 PyObject* string;
1106 int start = 0;
1107 int end = INT_MAX;
1108 if (!PyArg_ParseTuple(args, "O|ii", &string, &start, &end))
1109 return NULL;
1110
1111 /* get pointer to string buffer */
1112 buffer = string->ob_type->tp_as_buffer;
1113 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1114 buffer->bf_getsegcount(string, NULL) != 1) {
1115 PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
1116 return NULL;
1117 }
1118
1119 /* determine buffer size */
1120 count = buffer->bf_getreadbuffer(string, 0, &ptr);
1121 if (count < 0) {
1122 /* sanity check */
1123 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1124 return NULL;
1125 }
1126
1127 /* determine character size */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001128#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001129 state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001130#else
1131 state->charsize = 1;
1132#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001133
1134 count /= state->charsize;
1135
1136 /* adjust boundaries */
1137 if (start < 0)
1138 start = 0;
1139 else if (start > count)
1140 start = count;
1141
1142 if (end < 0)
1143 end = 0;
1144 else if (end > count)
1145 end = count;
1146
1147 state->beginning = ptr;
1148
1149 state->start = (void*) ((char*) ptr + start * state->charsize);
1150 state->end = (void*) ((char*) ptr + end * state->charsize);
1151
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001152 state->lastmark = 0;
1153
Guido van Rossumb700df92000-03-31 14:59:30 +00001154 /* FIXME: dynamic! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001155 for (i = 0; i < SRE_MARK_SIZE; i++)
Guido van Rossumb700df92000-03-31 14:59:30 +00001156 state->mark[i] = NULL;
1157
1158 state->stack = NULL;
1159 state->stackbase = 0;
1160 state->stacksize = 0;
1161
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001162 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001163 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001164#if defined(HAVE_UNICODE)
1165 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001166 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001167#endif
1168 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001169 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001170
Guido van Rossumb700df92000-03-31 14:59:30 +00001171 return string;
1172}
1173
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001174LOCAL(void)
1175state_fini(SRE_STATE* state)
1176{
1177 stack_free(state);
1178}
1179
1180LOCAL(PyObject*)
1181state_getslice(SRE_STATE* state, int index, PyObject* string)
1182{
1183 index = (index - 1) * 2;
1184
1185 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1186 Py_INCREF(Py_None);
1187 return Py_None;
1188 }
1189
1190 return PySequence_GetSlice(
1191 string,
1192 ((char*)state->mark[index] - (char*)state->beginning) /
1193 state->charsize,
1194 ((char*)state->mark[index+1] - (char*)state->beginning) /
1195 state->charsize
1196 );
1197}
1198
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001199static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001200pattern_new_match(PatternObject* pattern, SRE_STATE* state,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001201 PyObject* string, int status)
1202{
1203 /* create match object (from state object) */
1204
1205 MatchObject* match;
1206 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001207 char* base;
1208 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001209
1210 if (status > 0) {
1211
1212 /* create match object (with room for extra group marks) */
1213 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2*pattern->groups);
1214 if (match == NULL)
1215 return NULL;
1216
1217 Py_INCREF(pattern);
1218 match->pattern = pattern;
1219
1220 Py_INCREF(string);
1221 match->string = string;
1222
1223 match->groups = pattern->groups+1;
1224
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001225 base = (char*) state->beginning;
1226 n = state->charsize;
1227
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001228 /* group zero */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001229 match->mark[0] = ((char*) state->start - base) / n;
1230 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001231
1232 /* fill in the rest of the groups */
1233 for (i = j = 0; i < pattern->groups; i++, j+=2)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001234 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1235 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1236 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001237 } else
1238 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1239
1240 return (PyObject*) match;
1241
1242 } else if (status < 0) {
1243
1244 /* internal error */
1245 PyErr_SetString(
1246 PyExc_RuntimeError, "internal error in regular expression engine"
1247 );
1248 return NULL;
1249
1250 }
1251
1252 Py_INCREF(Py_None);
1253 return Py_None;
1254}
1255
1256static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001257pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001258{
1259 /* create search state object */
1260
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001261 ScannerObject* self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001262 PyObject* string;
1263
1264 /* create match object (with room for extra group marks) */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001265 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001266 if (self == NULL)
1267 return NULL;
1268
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001269 string = state_init(&self->state, pattern, args);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001270 if (!string) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001271 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001272 return NULL;
1273 }
1274
1275 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001276 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001277
1278 Py_INCREF(string);
1279 self->string = string;
1280
1281 return (PyObject*) self;
1282}
1283
Guido van Rossumb700df92000-03-31 14:59:30 +00001284static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001285pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001286{
1287 Py_XDECREF(self->code);
1288 Py_XDECREF(self->pattern);
1289 Py_XDECREF(self->groupindex);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001290 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001291}
1292
1293static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001294pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001295{
1296 SRE_STATE state;
1297 PyObject* string;
1298 int status;
1299
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001300 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001301 if (!string)
1302 return NULL;
1303
1304 state.ptr = state.start;
1305
1306 if (state.charsize == 1) {
1307 status = sre_match(&state, PatternObject_GetCode(self));
1308 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001309#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001310 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001311#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001312 }
1313
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001314 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001315
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001316 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001317}
1318
1319static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001320pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001321{
1322 SRE_STATE state;
1323 PyObject* string;
1324 int status;
1325
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001326 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001327 if (!string)
1328 return NULL;
1329
1330 if (state.charsize == 1) {
1331 status = sre_search(&state, PatternObject_GetCode(self));
1332 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001333#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001334 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001335#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001336 }
1337
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001338 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001339
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001340 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001341}
1342
1343static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001344call(char* function, PyObject* args)
1345{
1346 PyObject* name;
1347 PyObject* module;
1348 PyObject* func;
1349 PyObject* result;
1350
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001351 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001352 if (!name)
1353 return NULL;
1354 module = PyImport_Import(name);
1355 Py_DECREF(name);
1356 if (!module)
1357 return NULL;
1358 func = PyObject_GetAttrString(module, function);
1359 Py_DECREF(module);
1360 if (!func)
1361 return NULL;
1362 result = PyObject_CallObject(func, args);
1363 Py_DECREF(func);
1364 Py_DECREF(args);
1365 return result;
1366}
1367
1368static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001369pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001370{
1371 PyObject* template;
1372 PyObject* string;
1373 PyObject* count;
1374 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1375 return NULL;
1376
1377 /* delegate to Python code */
1378 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1379}
1380
1381static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001382pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001383{
1384 PyObject* template;
1385 PyObject* string;
1386 PyObject* count;
1387 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1388 return NULL;
1389
1390 /* delegate to Python code */
1391 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1392}
1393
1394static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001395pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001396{
1397 PyObject* string;
1398 PyObject* maxsplit;
1399 if (!PyArg_ParseTuple(args, "OO", &string, &maxsplit))
1400 return NULL;
1401
1402 /* delegate to Python code */
1403 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1404}
1405
1406static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001407pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001408{
Guido van Rossumb700df92000-03-31 14:59:30 +00001409 SRE_STATE state;
1410 PyObject* string;
1411 PyObject* list;
1412 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001413 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001414
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001415 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001416 if (!string)
1417 return NULL;
1418
1419 list = PyList_New(0);
1420
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001421 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001422
1423 PyObject* item;
1424
1425 state.ptr = state.start;
1426
1427 if (state.charsize == 1) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001428 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +00001429 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001430#if defined(HAVE_UNICODE)
1431 status = sre_usearch(&state, PatternObject_GetCode(self));
1432#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001433 }
1434
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001435 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001436
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001437 /* don't bother to build a match object */
1438 switch (self->groups) {
1439 case 0:
1440 item = PySequence_GetSlice(
1441 string,
1442 ((char*) state.start - (char*) state.beginning) /
1443 state.charsize,
1444 ((char*) state.ptr - (char*) state.beginning) /
1445 state.charsize);
1446 if (!item)
1447 goto error;
1448 break;
1449 case 1:
1450 item = state_getslice(&state, 1, string);
1451 if (!item)
1452 goto error;
1453 break;
1454 default:
1455 item = PyTuple_New(self->groups);
1456 if (!item)
1457 goto error;
1458 for (i = 0; i < self->groups; i++) {
1459 PyObject* o = state_getslice(&state, i+1, string);
1460 if (!o) {
1461 Py_DECREF(item);
1462 goto error;
1463 }
1464 PyTuple_SET_ITEM(item, i, o);
1465 }
1466 break;
1467 }
1468
1469 if (PyList_Append(list, item) < 0) {
1470 Py_DECREF(item);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001471 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001472 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001473
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001474 if (state.ptr == state.start)
1475 state.start = (void*) ((char*) state.ptr + state.charsize);
1476 else
1477 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001478
1479 } else {
1480
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001481 if (status == 0)
1482 break;
1483
Guido van Rossumb700df92000-03-31 14:59:30 +00001484 /* internal error */
1485 PyErr_SetString(
1486 PyExc_RuntimeError,
1487 "internal error in regular expression engine"
1488 );
1489 goto error;
1490
1491 }
1492 }
1493
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001494 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001495 return list;
1496
1497error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001498 Py_DECREF(list);
1499 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001500 return NULL;
1501
1502}
1503
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001504static PyMethodDef pattern_methods[] = {
1505 {"match", (PyCFunction) pattern_match, 1},
1506 {"search", (PyCFunction) pattern_search, 1},
1507 {"sub", (PyCFunction) pattern_sub, 1},
1508 {"subn", (PyCFunction) pattern_subn, 1},
1509 {"split", (PyCFunction) pattern_split, 1},
1510 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001511 /* experimental */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001512 {"scanner", (PyCFunction) pattern_scanner, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001513 {NULL, NULL}
1514};
1515
1516static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001517pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001518{
1519 PyObject* res;
1520
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001521 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001522
1523 if (res)
1524 return res;
1525
1526 PyErr_Clear();
1527
1528 /* attributes */
1529 if (!strcmp(name, "pattern")) {
1530 Py_INCREF(self->pattern);
1531 return self->pattern;
1532 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001533
1534 if (!strcmp(name, "flags"))
1535 return Py_BuildValue("i", self->flags);
1536
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001537 if (!strcmp(name, "groups"))
1538 return Py_BuildValue("i", self->groups);
1539
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001540 if (!strcmp(name, "groupindex") && self->groupindex) {
1541 Py_INCREF(self->groupindex);
1542 return self->groupindex;
1543 }
1544
Guido van Rossumb700df92000-03-31 14:59:30 +00001545 PyErr_SetString(PyExc_AttributeError, name);
1546 return NULL;
1547}
1548
1549statichere PyTypeObject Pattern_Type = {
1550 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001551 0, "SRE_Pattern", sizeof(PatternObject), 0,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001552 (destructor)pattern_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001553 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001554 (getattrfunc)pattern_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001555};
1556
1557/* -------------------------------------------------------------------- */
1558/* match methods */
1559
1560static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001561match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001562{
1563 Py_XDECREF(self->string);
1564 Py_DECREF(self->pattern);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001565 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001566}
1567
1568static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001569match_getslice_by_index(MatchObject* self, int index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001570{
1571 if (index < 0 || index >= self->groups) {
1572 /* raise IndexError if we were given a bad group number */
1573 PyErr_SetString(
1574 PyExc_IndexError,
1575 "no such group"
1576 );
1577 return NULL;
1578 }
1579
1580 if (self->string == Py_None || self->mark[index+index] < 0) {
1581 /* return None if the string or group is undefined */
1582 Py_INCREF(Py_None);
1583 return Py_None;
1584 }
1585
1586 return PySequence_GetSlice(
1587 self->string, self->mark[index+index], self->mark[index+index+1]
1588 );
1589}
1590
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001591static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001592match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001593{
1594 if (!PyInt_Check(index) && self->pattern->groupindex != NULL) {
1595 /* FIXME: resource leak? */
1596 index = PyObject_GetItem(self->pattern->groupindex, index);
1597 if (!index)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001598 return -1;
Guido van Rossumb700df92000-03-31 14:59:30 +00001599 }
1600
1601 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001602 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001603
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001604 return -1;
1605}
1606
1607static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001608match_getslice(MatchObject* self, PyObject* index)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001609{
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001610 return match_getslice_by_index(self, match_getindex(self, index));
Guido van Rossumb700df92000-03-31 14:59:30 +00001611}
1612
1613static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001614match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001615{
1616 PyObject* result;
1617 int i, size;
1618
1619 size = PyTuple_GET_SIZE(args);
1620
1621 switch (size) {
1622 case 0:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001623 result = match_getslice(self, Py_False);
Guido van Rossumb700df92000-03-31 14:59:30 +00001624 break;
1625 case 1:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001626 result = match_getslice(self, PyTuple_GET_ITEM(args, 0));
Guido van Rossumb700df92000-03-31 14:59:30 +00001627 break;
1628 default:
1629 /* fetch multiple items */
1630 result = PyTuple_New(size);
1631 if (!result)
1632 return NULL;
1633 for (i = 0; i < size; i++) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001634 PyObject* item = match_getslice(self, PyTuple_GET_ITEM(args, i));
Guido van Rossumb700df92000-03-31 14:59:30 +00001635 if (!item) {
1636 Py_DECREF(result);
1637 return NULL;
1638 }
1639 PyTuple_SET_ITEM(result, i, item);
1640 }
1641 break;
1642 }
1643 return result;
1644}
1645
1646static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001647match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001648{
1649 PyObject* result;
1650 int index;
1651
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001652 /* FIXME: <fl> handle default value! */
1653
Guido van Rossumb700df92000-03-31 14:59:30 +00001654 result = PyTuple_New(self->groups-1);
1655 if (!result)
1656 return NULL;
1657
1658 for (index = 1; index < self->groups; index++) {
1659 PyObject* item;
1660 /* FIXME: <fl> handle default! */
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001661 item = match_getslice_by_index(self, index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001662 if (!item) {
1663 Py_DECREF(result);
1664 return NULL;
1665 }
1666 PyTuple_SET_ITEM(result, index-1, item);
1667 }
1668
1669 return result;
1670}
1671
1672static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001673match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001674{
1675 PyObject* result;
1676 PyObject* keys;
1677 int index;
1678
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001679 /* FIXME: <fl> handle default value! */
1680
Guido van Rossumb700df92000-03-31 14:59:30 +00001681 result = PyDict_New();
1682 if (!result)
1683 return NULL;
1684 if (!self->pattern->groupindex)
1685 return result;
1686
1687 keys = PyMapping_Keys(self->pattern->groupindex);
1688 if (!keys)
1689 return NULL;
1690
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001691 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001692 PyObject* key;
1693 PyObject* item;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001694 key = PyList_GET_ITEM(keys, index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001695 if (!key) {
1696 Py_DECREF(keys);
1697 Py_DECREF(result);
1698 return NULL;
1699 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001700 item = match_getslice(self, key);
Guido van Rossumb700df92000-03-31 14:59:30 +00001701 if (!item) {
1702 Py_DECREF(key);
1703 Py_DECREF(keys);
1704 Py_DECREF(result);
1705 return NULL;
1706 }
1707 /* FIXME: <fl> this can fail, right? */
1708 PyDict_SetItem(result, key, item);
1709 }
1710
1711 Py_DECREF(keys);
1712
1713 return result;
1714}
1715
1716static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001717match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001718{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001719 int index;
1720
1721 PyObject* index_ = Py_False;
1722 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001723 return NULL;
1724
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001725 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001726
Guido van Rossumb700df92000-03-31 14:59:30 +00001727 if (index < 0 || index >= self->groups) {
1728 PyErr_SetString(
1729 PyExc_IndexError,
1730 "no such group"
1731 );
1732 return NULL;
1733 }
1734
1735 if (self->mark[index*2] < 0) {
1736 Py_INCREF(Py_None);
1737 return Py_None;
1738 }
1739
1740 return Py_BuildValue("i", self->mark[index*2]);
1741}
1742
1743static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001744match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001745{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001746 int index;
1747
1748 PyObject* index_ = Py_False;
1749 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001750 return NULL;
1751
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001752 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001753
Guido van Rossumb700df92000-03-31 14:59:30 +00001754 if (index < 0 || index >= self->groups) {
1755 PyErr_SetString(
1756 PyExc_IndexError,
1757 "no such group"
1758 );
1759 return NULL;
1760 }
1761
1762 if (self->mark[index*2] < 0) {
1763 Py_INCREF(Py_None);
1764 return Py_None;
1765 }
1766
1767 return Py_BuildValue("i", self->mark[index*2+1]);
1768}
1769
1770static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001771match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001772{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001773 int index;
1774
1775 PyObject* index_ = Py_False;
1776 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001777 return NULL;
1778
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001779 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001780
Guido van Rossumb700df92000-03-31 14:59:30 +00001781 if (index < 0 || index >= self->groups) {
1782 PyErr_SetString(
1783 PyExc_IndexError,
1784 "no such group"
1785 );
1786 return NULL;
1787 }
1788
1789 if (self->mark[index*2] < 0) {
1790 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001791 Py_INCREF(Py_None);
1792 return Py_BuildValue("OO", Py_None, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001793 }
1794
1795 return Py_BuildValue("ii", self->mark[index*2], self->mark[index*2+1]);
1796}
1797
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001798static PyMethodDef match_methods[] = {
1799 {"group", (PyCFunction) match_group, 1},
1800 {"start", (PyCFunction) match_start, 1},
1801 {"end", (PyCFunction) match_end, 1},
1802 {"span", (PyCFunction) match_span, 1},
1803 {"groups", (PyCFunction) match_groups, 1},
1804 {"groupdict", (PyCFunction) match_groupdict, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001805 {NULL, NULL}
1806};
1807
1808static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001809match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001810{
1811 PyObject* res;
1812
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001813 res = Py_FindMethod(match_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001814 if (res)
1815 return res;
1816
1817 PyErr_Clear();
1818
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001819 /* attributes */
Guido van Rossumb700df92000-03-31 14:59:30 +00001820 if (!strcmp(name, "string")) {
1821 Py_INCREF(self->string);
1822 return self->string;
1823 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001824
Guido van Rossumb700df92000-03-31 14:59:30 +00001825 if (!strcmp(name, "re")) {
1826 Py_INCREF(self->pattern);
1827 return (PyObject*) self->pattern;
1828 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001829
Guido van Rossumb700df92000-03-31 14:59:30 +00001830 if (!strcmp(name, "pos"))
1831 return Py_BuildValue("i", 0); /* FIXME */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001832
Guido van Rossumb700df92000-03-31 14:59:30 +00001833 if (!strcmp(name, "endpos"))
1834 return Py_BuildValue("i", 0); /* FIXME */
1835
1836 PyErr_SetString(PyExc_AttributeError, name);
1837 return NULL;
1838}
1839
1840/* FIXME: implement setattr("string", None) as a special case (to
1841 detach the associated string, if any */
1842
1843statichere PyTypeObject Match_Type = {
1844 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001845 0, "SRE_Match",
Guido van Rossumb700df92000-03-31 14:59:30 +00001846 sizeof(MatchObject), /* size of basic object */
1847 sizeof(int), /* space for group item */
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001848 (destructor)match_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001849 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001850 (getattrfunc)match_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001851};
1852
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001853/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001854/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001855
1856static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001857scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001858{
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001859 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001860 Py_DECREF(self->string);
1861 Py_DECREF(self->pattern);
1862 PyMem_DEL(self);
1863}
1864
1865static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001866scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001867{
1868 SRE_STATE* state = &self->state;
1869 PyObject* match;
1870 int status;
1871
1872 state->ptr = state->start;
1873
1874 if (state->charsize == 1) {
1875 status = sre_match(state, PatternObject_GetCode(self->pattern));
1876 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001877#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001878 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001879#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001880 }
1881
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001882 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001883 state, self->string, status);
1884
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001885 if (status == 0 || state->ptr == state->start)
1886 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001887 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001888 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001889
1890 return match;
1891}
1892
1893
1894static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001895scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001896{
1897 SRE_STATE* state = &self->state;
1898 PyObject* match;
1899 int status;
1900
1901 state->ptr = state->start;
1902
1903 if (state->charsize == 1) {
1904 status = sre_search(state, PatternObject_GetCode(self->pattern));
1905 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001906#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001907 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001908#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001909 }
1910
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001911 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001912 state, self->string, status);
1913
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001914 if (status == 0 || state->ptr == state->start)
1915 state->start = (void*) ((char*) state->ptr + state->charsize);
1916 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001917 state->start = state->ptr;
1918
1919 return match;
1920}
1921
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001922static PyMethodDef scanner_methods[] = {
1923 {"match", (PyCFunction) scanner_match, 0},
1924 {"search", (PyCFunction) scanner_search, 0},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001925 {NULL, NULL}
1926};
1927
1928static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001929scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001930{
1931 PyObject* res;
1932
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001933 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001934 if (res)
1935 return res;
1936
1937 PyErr_Clear();
1938
1939 /* attributes */
1940 if (!strcmp(name, "pattern")) {
1941 Py_INCREF(self->pattern);
1942 return self->pattern;
1943 }
1944
1945 PyErr_SetString(PyExc_AttributeError, name);
1946 return NULL;
1947}
1948
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001949statichere PyTypeObject Scanner_Type = {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001950 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001951 0, "SRE_Scanner",
1952 sizeof(ScannerObject), /* size of basic object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001953 0,
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001954 (destructor)scanner_dealloc, /*tp_dealloc*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001955 0, /*tp_print*/
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001956 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001957};
1958
Guido van Rossumb700df92000-03-31 14:59:30 +00001959static PyMethodDef _functions[] = {
1960 {"compile", _compile, 1},
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001961 {"getcodesize", sre_codesize, 1},
Fredrik Lundhb389df32000-06-29 12:48:37 +00001962 {"getlower", sre_getlower, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001963 {NULL, NULL}
1964};
1965
1966void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001967#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00001968__declspec(dllexport)
1969#endif
1970init_sre()
1971{
1972 /* Patch object types */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001973 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001974 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001975
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001976 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00001977}
1978
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001979#endif /* !defined(SRE_RECURSIVE) */