blob: 22b6c7347c5d2c1ba8540f5b1413add087eafef7 [file] [log] [blame]
Guido van Rossumb700df92000-03-31 14:59:30 +00001/* -*- Mode: C; tab-width: 4 -*-
2 *
3 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00004 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00005 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00006 *
7 * partial history:
Fredrik Lundh436c3d582000-06-29 08:58:44 +00008 * 99-10-24 fl created (based on existing template matcher code)
Guido van Rossumb700df92000-03-31 14:59:30 +00009 * 99-11-13 fl added categories, branching, and more (0.2)
10 * 99-11-16 fl some tweaks to compile on non-Windows platforms
11 * 99-12-18 fl non-literals, generic maximizing repeat (0.3)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012 * 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
13 * 00-03-06 fl first alpha, sort of (0.5)
14 * 00-03-14 fl removed most compatibility stuff (0.6)
15 * 00-05-10 fl towards third alpha (0.8.2)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000016 * 00-05-13 fl added experimental scanner stuff (0.8.3)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000017 * 00-05-27 fl final bug hunt (0.8.4)
18 * 00-06-21 fl less bugs, more taste (0.8.5)
19 * 00-06-25 fl major changes to better deal with nested repeats (0.9)
20 * 00-06-28 fl fixed findall (0.9.1)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000021 * 00-06-29 fl fixed split, added more scanner features (0.9.2)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000022 * 00-06-30 fl tuning, fast search (0.9.3)
Fredrik Lundh43b3b492000-06-30 10:41:31 +000023 * 00-06-30 fl added assert (lookahead) primitives (0.9.4)
Guido van Rossumb700df92000-03-31 14:59:30 +000024 *
25 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
26 *
Guido van Rossumb700df92000-03-31 14:59:30 +000027 * Portions of this engine have been developed in cooperation with
28 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
29 * other compatibility work.
30 */
31
32#ifndef SRE_RECURSIVE
33
Fredrik Lundh43b3b492000-06-30 10:41:31 +000034char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000035
36#include "Python.h"
37
38#include "sre.h"
39
Guido van Rossumb700df92000-03-31 14:59:30 +000040#if defined(HAVE_LIMITS_H)
41#include <limits.h>
42#else
43#define INT_MAX 2147483647
44#endif
45
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000046#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000047
Fredrik Lundh436c3d582000-06-29 08:58:44 +000048/* name of this module, minus the leading underscore */
49#define MODULE "sre"
50
Guido van Rossumb700df92000-03-31 14:59:30 +000051/* defining this one enables tracing */
52#undef DEBUG
53
Fredrik Lundh436c3d582000-06-29 08:58:44 +000054#if PY_VERSION_HEX >= 0x01060000
55/* defining this enables unicode support (default under 1.6) */
56#define HAVE_UNICODE
57#endif
58
Fredrik Lundh29c08be2000-06-29 23:33:12 +000059/* optional features */
60#define USE_FAST_SEARCH
61
Fredrik Lundh80946112000-06-29 18:03:25 +000062#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000063#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
64/* fastest possible local call under MSVC */
65#define LOCAL(type) static __inline type __fastcall
66#else
Fredrik Lundh29c08be2000-06-29 23:33:12 +000067#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000068#endif
69
70/* error codes */
71#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
72#define SRE_ERROR_MEMORY -9 /* out of memory */
73
Fredrik Lundh436c3d582000-06-29 08:58:44 +000074#if defined(DEBUG)
Guido van Rossumb700df92000-03-31 14:59:30 +000075#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000076#else
77#define TRACE(v)
78#endif
79
Fredrik Lundh436c3d582000-06-29 08:58:44 +000080#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000081
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000082/* -------------------------------------------------------------------- */
83/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000084
Fredrik Lundh436c3d582000-06-29 08:58:44 +000085/* default character predicates (run sre_chars.py to regenerate tables) */
86
87#define SRE_DIGIT_MASK 1
88#define SRE_SPACE_MASK 2
89#define SRE_LINEBREAK_MASK 4
90#define SRE_ALNUM_MASK 8
91#define SRE_WORD_MASK 16
92
93static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
942, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
950, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
9625, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9724, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
980, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
100
Fredrik Lundhb389df32000-06-29 12:48:37 +0000101static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000010210, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
10327, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
10444, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
10561, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
106108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
107122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
108106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
109120, 121, 122, 123, 124, 125, 126, 127 };
110
Fredrik Lundhb389df32000-06-29 12:48:37 +0000111static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000112{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000113 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000114}
115
116#define SRE_IS_DIGIT(ch)\
117 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
118#define SRE_IS_SPACE(ch)\
119 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
120#define SRE_IS_LINEBREAK(ch)\
121 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
122#define SRE_IS_ALNUM(ch)\
123 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
124#define SRE_IS_WORD(ch)\
125 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000126
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000127/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000128
Fredrik Lundhb389df32000-06-29 12:48:37 +0000129static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000130{
131 return ((ch) < 256 ? tolower((ch)) : ch);
132}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000133#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
134#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
135#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
136#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
137#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
138
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000139/* unicode-specific character predicates */
140
141#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000142static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000143{
144 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
145}
146#define SRE_UNI_TO_LOWER(ch) Py_UNICODE_TOLOWER((Py_UNICODE)(ch))
147#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
148#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
149#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
150#define SRE_UNI_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
151#define SRE_UNI_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
152#endif
153
Guido van Rossumb700df92000-03-31 14:59:30 +0000154LOCAL(int)
155sre_category(SRE_CODE category, unsigned int ch)
156{
157 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000158
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000159 case SRE_CATEGORY_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000160 return SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000161 case SRE_CATEGORY_NOT_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000162 return !SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163 case SRE_CATEGORY_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000164 return SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165 case SRE_CATEGORY_NOT_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000166 return !SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000167 case SRE_CATEGORY_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000168 return SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000169 case SRE_CATEGORY_NOT_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000170 return !SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000171 case SRE_CATEGORY_LINEBREAK:
172 return SRE_IS_LINEBREAK(ch);
173 case SRE_CATEGORY_NOT_LINEBREAK:
174 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000175
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000176 case SRE_CATEGORY_LOC_WORD:
177 return SRE_LOC_IS_WORD(ch);
178 case SRE_CATEGORY_LOC_NOT_WORD:
179 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000180
181#if defined(HAVE_UNICODE)
182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
198#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000199 }
200 return 0;
201}
202
203/* helpers */
204
205LOCAL(int)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000206stack_free(SRE_STATE* state)
Guido van Rossumb700df92000-03-31 14:59:30 +0000207{
208 if (state->stack) {
209 TRACE(("release stack\n"));
210 free(state->stack);
211 state->stack = NULL;
212 }
213 state->stacksize = 0;
214 return 0;
215}
216
217static int /* shouldn't be LOCAL */
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000218stack_extend(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000219{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000220 SRE_STACK* stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000221 int stacksize;
222
223 /* grow the stack to a suitable size; we need at least lo entries,
224 at most hi entries. if for some reason hi is lower than lo, lo
225 wins */
226
227 stacksize = state->stacksize;
228
229 if (stacksize == 0) {
230 /* create new stack */
231 stacksize = 512;
232 if (stacksize < lo)
233 stacksize = lo;
234 else if (stacksize > hi)
235 stacksize = hi;
236 TRACE(("allocate stack %d\n", stacksize));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000237 stack = malloc(sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000238 } else {
239 /* grow the stack (typically by a factor of two) */
240 while (stacksize < lo)
241 stacksize = 2 * stacksize;
242 /* FIXME: <fl> could trim size if it's larger than lo, and
243 much larger than hi */
244 TRACE(("grow stack to %d\n", stacksize));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000245 stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000246 }
247
248 if (!stack) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000249 stack_free(state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000250 return SRE_ERROR_MEMORY;
251 }
252
253 state->stack = stack;
254 state->stacksize = stacksize;
255
256 return 0;
257}
258
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000259/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000260
261#define SRE_CHAR unsigned char
262#define SRE_AT sre_at
263#define SRE_MEMBER sre_member
264#define SRE_MATCH sre_match
265#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000266
267#if defined(HAVE_UNICODE)
268
Guido van Rossumb700df92000-03-31 14:59:30 +0000269#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000270#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000271#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000272
Guido van Rossumb700df92000-03-31 14:59:30 +0000273#undef SRE_SEARCH
274#undef SRE_MATCH
275#undef SRE_MEMBER
276#undef SRE_AT
277#undef SRE_CHAR
278
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000279/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000280
281#define SRE_CHAR Py_UNICODE
282#define SRE_AT sre_uat
283#define SRE_MEMBER sre_umember
284#define SRE_MATCH sre_umatch
285#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000286#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000287
288#endif /* SRE_RECURSIVE */
289
290/* -------------------------------------------------------------------- */
291/* String matching engine */
292
293/* the following section is compiled twice, with different character
294 settings */
295
296LOCAL(int)
297SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
298{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000299 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000300
301 int this, that;
302
303 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000304
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000305 case SRE_AT_BEGINNING:
Guido van Rossum29530882000-04-10 17:06:55 +0000306 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000307
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000308 case SRE_AT_BEGINNING_LINE:
309 return ((void*) ptr == state->beginning ||
310 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000311
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000312 case SRE_AT_END:
Guido van Rossum29530882000-04-10 17:06:55 +0000313 return ((void*) ptr == state->end);
Fredrik Lundh80946112000-06-29 18:03:25 +0000314
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000315 case SRE_AT_END_LINE:
316 return ((void*) ptr == state->end ||
317 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000318
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000319 case SRE_AT_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000320 if (state->beginning == state->end)
321 return 0;
322 that = ((void*) ptr > state->beginning) ?
323 SRE_IS_WORD((int) ptr[-1]) : 0;
324 this = ((void*) ptr < state->end) ?
325 SRE_IS_WORD((int) ptr[0]) : 0;
326 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000327
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000328 case SRE_AT_NON_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000329 if (state->beginning == state->end)
330 return 0;
331 that = ((void*) ptr > state->beginning) ?
332 SRE_IS_WORD((int) ptr[-1]) : 0;
333 this = ((void*) ptr < state->end) ?
334 SRE_IS_WORD((int) ptr[0]) : 0;
335 return this == that;
336 }
337
338 return 0;
339}
340
341LOCAL(int)
342SRE_MEMBER(SRE_CODE* set, SRE_CHAR ch)
343{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000344 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000345
346 int ok = 1;
347
348 for (;;) {
349 switch (*set++) {
350
351 case SRE_OP_NEGATE:
352 ok = !ok;
353 break;
354
355 case SRE_OP_FAILURE:
356 return !ok;
357
358 case SRE_OP_LITERAL:
359 if (ch == (SRE_CHAR) set[0])
360 return ok;
361 set++;
362 break;
363
364 case SRE_OP_RANGE:
365 if ((SRE_CHAR) set[0] <= ch && ch <= (SRE_CHAR) set[1])
366 return ok;
367 set += 2;
368 break;
369
370 case SRE_OP_CATEGORY:
371 if (sre_category(set[0], (int) ch))
372 return ok;
373 set += 1;
374 break;
375
376 default:
Fredrik Lundh80946112000-06-29 18:03:25 +0000377 /* internal error -- there's not much we can do about it
378 here, so let's just pretend it didn't match... */
Guido van Rossumb700df92000-03-31 14:59:30 +0000379 return 0;
380 }
381 }
382}
383
384LOCAL(int)
385SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
386{
387 /* check if string matches the given pattern. returns -1 for
388 error, 0 for failure, and 1 for success */
389
390 SRE_CHAR* end = state->end;
391 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000392 int stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000393 int stackbase;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000394 int lastmark;
Guido van Rossumb700df92000-03-31 14:59:30 +0000395 int i, count;
396
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000397 /* FIXME: this is a hack! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000398 void* mark_copy[SRE_MARK_SIZE];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000399 void* mark = NULL;
400
401 TRACE(("%8d: enter\n", PTR(ptr)));
402
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000403 if (pattern[0] == SRE_OP_INFO) {
404 /* optimization info block */
405 /* args: <1=skip> <2=flags> <3=min> ... */
406 if (pattern[3] && (end - ptr) < pattern[3]) {
407 TRACE(("reject (got %d chars, need %d)\n",
408 (end - ptr), pattern[3]));
409 return 0;
410 }
411 pattern += pattern[1] + 1;
412 }
413
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000414 stackbase = stack = state->stackbase;
415 lastmark = state->lastmark;
416
417 retry:
Guido van Rossumb700df92000-03-31 14:59:30 +0000418
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000419 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000420
421 switch (*pattern++) {
422
423 case SRE_OP_FAILURE:
424 /* immediate failure */
425 TRACE(("%8d: failure\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000426 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000427
428 case SRE_OP_SUCCESS:
429 /* end of pattern */
430 TRACE(("%8d: success\n", PTR(ptr)));
431 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000432 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000433
434 case SRE_OP_AT:
435 /* match at given position */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000436 /* args: <at> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000437 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
Guido van Rossumb700df92000-03-31 14:59:30 +0000438 if (!SRE_AT(state, ptr, *pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000439 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000440 pattern++;
441 break;
442
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000443 case SRE_OP_CATEGORY:
444 /* match at given category */
445 /* args: <category> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000446 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
447 *ptr, *pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000448 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
449 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000450 TRACE(("%8d: category ok\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000451 pattern++;
452 ptr++;
453 break;
454
Guido van Rossumb700df92000-03-31 14:59:30 +0000455 case SRE_OP_LITERAL:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000456 /* match literal string */
Guido van Rossumb700df92000-03-31 14:59:30 +0000457 /* args: <code> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000458 TRACE(("%8d: literal %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
459 if (ptr >= end || *ptr != (SRE_CHAR) pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000460 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000461 pattern++;
462 ptr++;
463 break;
464
465 case SRE_OP_NOT_LITERAL:
466 /* match anything that is not literal character */
467 /* args: <code> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000468 TRACE(("%8d: literal not %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
469 if (ptr >= end || *ptr == (SRE_CHAR) pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000470 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000471 pattern++;
472 ptr++;
473 break;
474
475 case SRE_OP_ANY:
476 /* match anything */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000477 TRACE(("%8d: anything\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000478 if (ptr >= end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000479 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000480 ptr++;
481 break;
482
483 case SRE_OP_IN:
484 /* match set member (or non_member) */
485 /* args: <skip> <set> */
486 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
487 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000488 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000489 pattern += pattern[0];
490 ptr++;
491 break;
492
493 case SRE_OP_GROUP:
494 /* match backreference */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000495 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000496 i = pattern[0];
497 {
Guido van Rossumb700df92000-03-31 14:59:30 +0000498 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
499 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
500 if (!p || !e || e < p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000501 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000502 while (p < e) {
503 if (ptr >= end || *ptr != *p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000504 goto failure;
505 p++; ptr++;
506 }
507 }
508 pattern++;
509 break;
510
511 case SRE_OP_GROUP_IGNORE:
512 /* match backreference */
513 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
514 i = pattern[0];
515 {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000516 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
517 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000518 if (!p || !e || e < p)
519 goto failure;
520 while (p < e) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000521 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000522 state->lower(*ptr) != state->lower(*p))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000523 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000524 p++; ptr++;
525 }
526 }
527 pattern++;
528 break;
529
530 case SRE_OP_LITERAL_IGNORE:
531 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), (SRE_CHAR) *pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000532 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000533 state->lower(*ptr) != state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000534 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000535 pattern++;
536 ptr++;
537 break;
538
539 case SRE_OP_NOT_LITERAL_IGNORE:
540 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr),
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000541 (SRE_CHAR) *pattern));
542 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000543 state->lower(*ptr) == state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000544 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000545 pattern++;
546 ptr++;
547 break;
548
549 case SRE_OP_IN_IGNORE:
550 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
551 if (ptr >= end
Fredrik Lundhb389df32000-06-29 12:48:37 +0000552 || !SRE_MEMBER(pattern+1, (SRE_CHAR) state->lower(*ptr)))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000553 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000554 pattern += pattern[0];
555 ptr++;
556 break;
557
558 case SRE_OP_MARK:
559 /* set mark */
560 /* args: <mark> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000561 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
562 if (state->lastmark < pattern[0])
563 state->lastmark = pattern[0];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000564 if (!mark) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000565 mark = mark_copy;
566 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000567 }
568 state->mark[pattern[0]] = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000569 pattern++;
570 break;
571
572 case SRE_OP_JUMP:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000573 case SRE_OP_INFO:
Guido van Rossumb700df92000-03-31 14:59:30 +0000574 /* jump forward */
575 /* args: <skip> */
576 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
577 pattern += pattern[0];
578 break;
579
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000580 case SRE_OP_ASSERT:
581 /* assert subpattern */
Guido van Rossumb700df92000-03-31 14:59:30 +0000582 /* args: <skip> <pattern> */
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000583 TRACE(("%8d: assert subpattern\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000584 state->ptr = ptr;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000585 i = SRE_MATCH(state, pattern + 1);
586 if (i < 0)
587 return i;
588 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000589 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000590 pattern += pattern[0];
Guido van Rossumb700df92000-03-31 14:59:30 +0000591 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000592
593 case SRE_OP_ASSERT_NOT:
594 /* assert not subpattern */
595 /* args: <skip> <pattern> */
596 TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
597 state->ptr = ptr;
598 i = SRE_MATCH(state, pattern + 1);
599 if (i < 0)
600 return i;
601 if (i)
602 goto failure;
603 pattern += pattern[0];
604 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000606#if 0
Guido van Rossumb700df92000-03-31 14:59:30 +0000607 case SRE_OP_MAX_REPEAT_ONE:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000608 /* match repeated sequence (maximizing regexp) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000609
610 /* this operator only works if the repeated item is
611 exactly one character wide, and we're not already
612 collecting backtracking points. for other cases,
613 use the MAX_REPEAT operator instead */
614
Guido van Rossumb700df92000-03-31 14:59:30 +0000615 /* args: <skip> <min> <max> <step> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000616 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
617 pattern[1], pattern[2]));
618
619 count = 0;
620
621 if (pattern[3] == SRE_OP_ANY) {
622 /* repeated wildcard. skip to the end of the target
623 string, and backtrack from there */
624 /* FIXME: must look for line endings */
625 if (ptr + pattern[1] > end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000626 goto failure; /* cannot match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000627 count = pattern[2];
628 if (count > end - ptr)
629 count = end - ptr;
630 ptr += count;
631
632 } else if (pattern[3] == SRE_OP_LITERAL) {
633 /* repeated literal */
634 SRE_CHAR chr = (SRE_CHAR) pattern[4];
635 while (count < (int) pattern[2]) {
636 if (ptr >= end || *ptr != chr)
637 break;
638 ptr++;
639 count++;
640 }
641
642 } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
643 /* repeated literal */
644 SRE_CHAR chr = (SRE_CHAR) pattern[4];
645 while (count < (int) pattern[2]) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000646 if (ptr >= end || (SRE_CHAR) state->lower(*ptr) != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000647 break;
648 ptr++;
649 count++;
650 }
651
652 } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
653 /* repeated non-literal */
654 SRE_CHAR chr = (SRE_CHAR) pattern[4];
655 while (count < (int) pattern[2]) {
656 if (ptr >= end || *ptr == chr)
657 break;
658 ptr++;
659 count++;
660 }
661
662 } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
663 /* repeated non-literal */
664 SRE_CHAR chr = (SRE_CHAR) pattern[4];
665 while (count < (int) pattern[2]) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000666 if (ptr >= end || (SRE_CHAR) state->lower(*ptr) == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000667 break;
668 ptr++;
669 count++;
670 }
671
672 } else if (pattern[3] == SRE_OP_IN) {
673 /* repeated set */
674 while (count < (int) pattern[2]) {
675 if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
676 break;
677 ptr++;
678 count++;
679 }
680
681 } else {
682 /* repeated single character pattern */
683 state->ptr = ptr;
684 while (count < (int) pattern[2]) {
685 i = SRE_MATCH(state, pattern + 3);
686 if (i < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000687 return i;
688 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000689 break;
690 count++;
691 }
692 state->ptr = ptr;
693 ptr += count;
694 }
695
696 /* when we arrive here, count contains the number of
697 matches, and ptr points to the tail of the target
698 string. check if the rest of the pattern matches, and
699 backtrack if not. */
700
Guido van Rossumb700df92000-03-31 14:59:30 +0000701 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
702
703 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000704 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000705
706 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
707 /* tail is empty. we're finished */
708 TRACE(("%8d: tail is empty\n", PTR(ptr)));
709 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000710 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000711
712 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000713 /* tail starts with a literal. skip positions where
714 the rest of the pattern cannot possibly match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000715 SRE_CHAR chr = (SRE_CHAR) pattern[pattern[0]+1];
716 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
717 for (;;) {
718 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
719 while (count >= (int) pattern[1] &&
720 (ptr >= end || *ptr != chr)) {
721 ptr--;
722 count--;
723 }
724 TRACE(("%8d: check tail\n", PTR(ptr)));
725 if (count < (int) pattern[1])
726 break;
727 state->ptr = ptr;
728 i = SRE_MATCH(state, pattern + pattern[0]);
729 if (i > 0) {
730 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000731 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000732 }
733 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
734 ptr--;
735 count--;
736 }
737
738 } else {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000739 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +0000740 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
741 while (count >= (int) pattern[1]) {
742 state->ptr = ptr;
743 i = SRE_MATCH(state, pattern + pattern[0]);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000744 if (i < 0)
745 return i;
746 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000747 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000748 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000749 }
750 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
751 ptr--;
752 count--;
753 }
754 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000755 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000756#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000757
758 case SRE_OP_MAX_REPEAT:
759 /* match repeated sequence (maximizing regexp). repeated
760 group should end with a MAX_UNTIL code */
761
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000762 /* args: <skip> <min> <max> <item> */
763
764 TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
Guido van Rossumb700df92000-03-31 14:59:30 +0000765 pattern[1], pattern[2]));
766
767 count = 0;
768 state->ptr = ptr;
769
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000770 /* match minimum number of items */
771 while (count < (int) pattern[1]) {
772 i = SRE_MATCH(state, pattern + 3);
773 if (i < 0)
774 return i;
775 if (!i)
776 goto failure;
777 if (state->ptr == ptr) {
778 /* if the match was successful but empty, set the
779 count to max and terminate the scanning loop */
780 count = (int) pattern[2];
781 break;
782 }
783 count++;
784 ptr = state->ptr;
785 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000786
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000787 TRACE(("%8d: found %d leading items\n", PTR(ptr), count));
Guido van Rossumb700df92000-03-31 14:59:30 +0000788
789 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000790 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000791
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000792 /* match maximum number of items, pushing alternate end
793 points to the stack */
Guido van Rossumb700df92000-03-31 14:59:30 +0000794
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000795 while (pattern[2] == 32767 || count < (int) pattern[2]) {
796 state->stackbase = stack;
797 i = SRE_MATCH(state, pattern + 3);
798 state->stackbase = stackbase; /* rewind */
799 if (i < 0)
800 return i;
801 if (!i)
802 break;
803 if (state->ptr == ptr) {
804 count = (int) pattern[2];
805 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000806 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000807 /* this position was valid; add it to the retry
808 stack */
809 if (stack >= state->stacksize) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000810 i = stack_extend(state, stack + 1,
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000811 stackbase + pattern[2]);
812 if (i < 0)
813 return i; /* out of memory */
814 }
815 TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
816 state->stack[stack].ptr = ptr;
817 state->stack[stack].pattern = pattern + pattern[0];
818 stack++;
819 /* move forward */
820 ptr = state->ptr;
821 count++;
Guido van Rossumb700df92000-03-31 14:59:30 +0000822 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000823
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000824 /* when we get here, count is the number of successful
825 matches, and ptr points to the tail. */
Guido van Rossumb700df92000-03-31 14:59:30 +0000826
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000827 TRACE(("%8d: skip +%d\n", PTR(ptr), pattern[0]));
828
829 pattern += pattern[0];
830 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000831
832 case SRE_OP_MIN_REPEAT:
833 /* match repeated sequence (minimizing regexp) */
834 TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
835 pattern[1], pattern[2]));
836 count = 0;
837 state->ptr = ptr;
838 /* match minimum number of items */
839 while (count < (int) pattern[1]) {
840 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000841 if (i < 0)
842 return i;
843 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000844 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000845 count++;
846 }
847 /* move forward until the tail matches. */
848 while (count <= (int) pattern[2]) {
849 ptr = state->ptr;
850 i = SRE_MATCH(state, pattern + pattern[0]);
851 if (i > 0) {
852 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000853 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000854 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000855 state->ptr = ptr; /* backtrack */
856 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000857 if (i < 0)
858 return i;
859 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000860 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000861 count++;
862 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000863 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000864
Guido van Rossumb700df92000-03-31 14:59:30 +0000865 case SRE_OP_BRANCH:
866 /* match one of several subpatterns */
867 /* format: <branch> <size> <head> ... <null> <tail> */
868 TRACE(("%8d: branch\n", PTR(ptr)));
869 while (*pattern) {
870 if (pattern[1] != SRE_OP_LITERAL ||
871 (ptr < end && *ptr == (SRE_CHAR) pattern[2])) {
872 TRACE(("%8d: branch check\n", PTR(ptr)));
873 state->ptr = ptr;
874 i = SRE_MATCH(state, pattern + 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000875 if (i < 0)
876 return i;
877 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000878 TRACE(("%8d: branch succeeded\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000879 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000880 }
881 }
882 pattern += *pattern;
883 }
884 TRACE(("%8d: branch failed\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000885 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000886
887 case SRE_OP_REPEAT:
888 /* TEMPLATE: match repeated sequence (no backtracking) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000889 /* args: <skip> <min> <max> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000890 TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
891 count = 0;
892 state->ptr = ptr;
893 while (count < (int) pattern[2]) {
894 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000895 if (i < 0)
896 return i;
897 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000898 break;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000899 if (state->ptr == ptr) {
900 count = (int) pattern[2];
901 break;
902 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000903 count++;
904 }
905 if (count <= (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000906 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000907 TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
908 pattern += pattern[0];
909 ptr = state->ptr;
910 break;
911
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000912 default:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000913 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000914 return SRE_ERROR_ILLEGAL;
915 }
916 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000917
918 failure:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000919 if (stack-- > stackbase) {
920 ptr = state->stack[stack].ptr;
921 pattern = state->stack[stack].pattern;
922 TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
923 goto retry;
924 }
925 TRACE(("%8d: leave (failure)\n", PTR(ptr)));
926 state->stackbase = stackbase;
927 state->lastmark = lastmark;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000928 if (mark)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000929 memcpy(state->mark, mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000930 return 0;
931
932 success:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000933 TRACE(("%8d: leave (success)\n", PTR(ptr)));
934 state->stackbase = stackbase;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000935 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000936}
937
938LOCAL(int)
939SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
940{
941 SRE_CHAR* ptr = state->start;
942 SRE_CHAR* end = state->end;
943 int status = 0;
Fredrik Lundh80946112000-06-29 18:03:25 +0000944 int prefix_len = 0;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000945 SRE_CODE* prefix;
946 SRE_CODE* overlap;
947 int literal = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000948
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000949 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000950 /* optimization info block */
951 /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
952
953 if (pattern[3] > 0) {
954 /* adjust end point (but make sure we leave at least one
955 character in there) */
956 end -= pattern[3]-1;
957 if (end <= ptr)
958 end = ptr+1;
959 }
960
961 literal = pattern[2];
962
963 prefix = pattern + 6;
964 prefix_len = pattern[5];
965
966 overlap = prefix + prefix_len - 1;
967
968 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000969 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000970
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000971#if defined(USE_FAST_SEARCH)
972 if (prefix_len > 1) {
973 /* pattern starts with a known prefix. use the overlap
974 table to skip forward as fast as we possibly can */
975 int i = 0;
976 end = state->end;
977 while (ptr < end) {
978 for (;;) {
979 if (*ptr != (SRE_CHAR) prefix[i]) {
980 if (!i)
981 break;
982 else
983 i = overlap[i];
984 } else {
985 if (++i == prefix_len) {
986 /* found a potential match */
987 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
988 state->start = ptr - prefix_len + 1;
989 state->ptr = ptr + 1;
990 if (literal)
991 return 1; /* all of it */
992 status = SRE_MATCH(state, pattern + 2*prefix_len);
993 if (status != 0)
994 return status;
995 /* close but no cigar -- try again */
996 i = overlap[i];
997 }
998 break;
999 }
1000
1001 }
1002 ptr++;
1003 }
1004 return 0;
1005 }
1006#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001007
Guido van Rossumb700df92000-03-31 14:59:30 +00001008 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001009 /* pattern starts with a literal character. this is used for
1010 short prefixes, and if fast search is disabled*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001011 SRE_CHAR chr = (SRE_CHAR) pattern[1];
1012 for (;;) {
1013 while (ptr < end && *ptr != chr)
1014 ptr++;
1015 if (ptr == end)
1016 return 0;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001017 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001018 state->start = ptr;
1019 state->ptr = ++ptr;
1020 status = SRE_MATCH(state, pattern + 2);
1021 if (status != 0)
1022 break;
1023 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001024 } else
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001025 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +00001026 while (ptr <= end) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001027 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001028 state->start = state->ptr = ptr++;
1029 status = SRE_MATCH(state, pattern);
1030 if (status != 0)
1031 break;
1032 }
1033
1034 return status;
1035}
1036
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001037#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001038
1039/* -------------------------------------------------------------------- */
1040/* factories and destructors */
1041
1042/* see sre.h for object declarations */
1043
1044staticforward PyTypeObject Pattern_Type;
1045staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001046staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001047
1048static PyObject *
1049_compile(PyObject* self_, PyObject* args)
1050{
1051 /* "compile" pattern descriptor to pattern object */
1052
1053 PatternObject* self;
1054
1055 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001056 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001057 PyObject* code;
1058 int groups = 0;
1059 PyObject* groupindex = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001060 if (!PyArg_ParseTuple(args, "OiO!|iO", &pattern, &flags,
1061 &PyString_Type, &code,
1062 &groups, &groupindex))
Guido van Rossumb700df92000-03-31 14:59:30 +00001063 return NULL;
1064
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001065 self = PyObject_NEW(PatternObject, &Pattern_Type);
Guido van Rossumb700df92000-03-31 14:59:30 +00001066 if (self == NULL)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001067
Guido van Rossumb700df92000-03-31 14:59:30 +00001068 return NULL;
1069
1070 Py_INCREF(pattern);
1071 self->pattern = pattern;
1072
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001073 self->flags = flags;
1074
Guido van Rossumb700df92000-03-31 14:59:30 +00001075 Py_INCREF(code);
1076 self->code = code;
1077
1078 self->groups = groups;
1079
1080 Py_XINCREF(groupindex);
1081 self->groupindex = groupindex;
1082
1083 return (PyObject*) self;
1084}
1085
1086static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001087sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001088{
1089 return Py_BuildValue("i", sizeof(SRE_CODE));
1090}
1091
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001092static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001093sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001094{
1095 int character, flags;
1096 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1097 return NULL;
1098 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001099 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001100#if defined(HAVE_UNICODE)
1101 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001102 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001103#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001104 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001105}
1106
Guido van Rossumb700df92000-03-31 14:59:30 +00001107LOCAL(PyObject*)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001108state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001109{
1110 /* prepare state object */
1111
1112 PyBufferProcs *buffer;
1113 int i, count;
1114 void* ptr;
1115
1116 PyObject* string;
1117 int start = 0;
1118 int end = INT_MAX;
1119 if (!PyArg_ParseTuple(args, "O|ii", &string, &start, &end))
1120 return NULL;
1121
1122 /* get pointer to string buffer */
1123 buffer = string->ob_type->tp_as_buffer;
1124 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1125 buffer->bf_getsegcount(string, NULL) != 1) {
1126 PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
1127 return NULL;
1128 }
1129
1130 /* determine buffer size */
1131 count = buffer->bf_getreadbuffer(string, 0, &ptr);
1132 if (count < 0) {
1133 /* sanity check */
1134 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1135 return NULL;
1136 }
1137
1138 /* determine character size */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001139#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001140 state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001141#else
1142 state->charsize = 1;
1143#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001144
1145 count /= state->charsize;
1146
1147 /* adjust boundaries */
1148 if (start < 0)
1149 start = 0;
1150 else if (start > count)
1151 start = count;
1152
1153 if (end < 0)
1154 end = 0;
1155 else if (end > count)
1156 end = count;
1157
1158 state->beginning = ptr;
1159
1160 state->start = (void*) ((char*) ptr + start * state->charsize);
1161 state->end = (void*) ((char*) ptr + end * state->charsize);
1162
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001163 state->lastmark = 0;
1164
Guido van Rossumb700df92000-03-31 14:59:30 +00001165 /* FIXME: dynamic! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001166 for (i = 0; i < SRE_MARK_SIZE; i++)
Guido van Rossumb700df92000-03-31 14:59:30 +00001167 state->mark[i] = NULL;
1168
1169 state->stack = NULL;
1170 state->stackbase = 0;
1171 state->stacksize = 0;
1172
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001173 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001174 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001175#if defined(HAVE_UNICODE)
1176 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001177 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001178#endif
1179 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001180 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001181
Guido van Rossumb700df92000-03-31 14:59:30 +00001182 return string;
1183}
1184
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001185LOCAL(void)
1186state_fini(SRE_STATE* state)
1187{
1188 stack_free(state);
1189}
1190
1191LOCAL(PyObject*)
1192state_getslice(SRE_STATE* state, int index, PyObject* string)
1193{
1194 index = (index - 1) * 2;
1195
1196 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1197 Py_INCREF(Py_None);
1198 return Py_None;
1199 }
1200
1201 return PySequence_GetSlice(
1202 string,
1203 ((char*)state->mark[index] - (char*)state->beginning) /
1204 state->charsize,
1205 ((char*)state->mark[index+1] - (char*)state->beginning) /
1206 state->charsize
1207 );
1208}
1209
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001210static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001211pattern_new_match(PatternObject* pattern, SRE_STATE* state,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001212 PyObject* string, int status)
1213{
1214 /* create match object (from state object) */
1215
1216 MatchObject* match;
1217 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001218 char* base;
1219 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001220
1221 if (status > 0) {
1222
1223 /* create match object (with room for extra group marks) */
1224 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2*pattern->groups);
1225 if (match == NULL)
1226 return NULL;
1227
1228 Py_INCREF(pattern);
1229 match->pattern = pattern;
1230
1231 Py_INCREF(string);
1232 match->string = string;
1233
1234 match->groups = pattern->groups+1;
1235
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001236 base = (char*) state->beginning;
1237 n = state->charsize;
1238
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001239 /* group zero */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001240 match->mark[0] = ((char*) state->start - base) / n;
1241 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001242
1243 /* fill in the rest of the groups */
1244 for (i = j = 0; i < pattern->groups; i++, j+=2)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001245 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1246 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1247 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001248 } else
1249 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1250
1251 return (PyObject*) match;
1252
1253 } else if (status < 0) {
1254
1255 /* internal error */
1256 PyErr_SetString(
1257 PyExc_RuntimeError, "internal error in regular expression engine"
1258 );
1259 return NULL;
1260
1261 }
1262
1263 Py_INCREF(Py_None);
1264 return Py_None;
1265}
1266
1267static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001268pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001269{
1270 /* create search state object */
1271
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001272 ScannerObject* self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001273 PyObject* string;
1274
1275 /* create match object (with room for extra group marks) */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001276 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001277 if (self == NULL)
1278 return NULL;
1279
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001280 string = state_init(&self->state, pattern, args);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001281 if (!string) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001282 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001283 return NULL;
1284 }
1285
1286 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001287 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001288
1289 Py_INCREF(string);
1290 self->string = string;
1291
1292 return (PyObject*) self;
1293}
1294
Guido van Rossumb700df92000-03-31 14:59:30 +00001295static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001296pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001297{
1298 Py_XDECREF(self->code);
1299 Py_XDECREF(self->pattern);
1300 Py_XDECREF(self->groupindex);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001301 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001302}
1303
1304static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001305pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001306{
1307 SRE_STATE state;
1308 PyObject* string;
1309 int status;
1310
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001311 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001312 if (!string)
1313 return NULL;
1314
1315 state.ptr = state.start;
1316
1317 if (state.charsize == 1) {
1318 status = sre_match(&state, PatternObject_GetCode(self));
1319 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001320#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001321 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001322#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001323 }
1324
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001325 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001326
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001327 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001328}
1329
1330static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001331pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001332{
1333 SRE_STATE state;
1334 PyObject* string;
1335 int status;
1336
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001337 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001338 if (!string)
1339 return NULL;
1340
1341 if (state.charsize == 1) {
1342 status = sre_search(&state, PatternObject_GetCode(self));
1343 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001344#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001345 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001346#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001347 }
1348
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001349 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001350
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001351 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001352}
1353
1354static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001355call(char* function, PyObject* args)
1356{
1357 PyObject* name;
1358 PyObject* module;
1359 PyObject* func;
1360 PyObject* result;
1361
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001362 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001363 if (!name)
1364 return NULL;
1365 module = PyImport_Import(name);
1366 Py_DECREF(name);
1367 if (!module)
1368 return NULL;
1369 func = PyObject_GetAttrString(module, function);
1370 Py_DECREF(module);
1371 if (!func)
1372 return NULL;
1373 result = PyObject_CallObject(func, args);
1374 Py_DECREF(func);
1375 Py_DECREF(args);
1376 return result;
1377}
1378
1379static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001380pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001381{
1382 PyObject* template;
1383 PyObject* string;
1384 PyObject* count;
1385 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1386 return NULL;
1387
1388 /* delegate to Python code */
1389 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1390}
1391
1392static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001393pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001394{
1395 PyObject* template;
1396 PyObject* string;
1397 PyObject* count;
1398 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1399 return NULL;
1400
1401 /* delegate to Python code */
1402 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1403}
1404
1405static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001406pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001407{
1408 PyObject* string;
1409 PyObject* maxsplit;
1410 if (!PyArg_ParseTuple(args, "OO", &string, &maxsplit))
1411 return NULL;
1412
1413 /* delegate to Python code */
1414 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1415}
1416
1417static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001418pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001419{
Guido van Rossumb700df92000-03-31 14:59:30 +00001420 SRE_STATE state;
1421 PyObject* string;
1422 PyObject* list;
1423 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001424 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001425
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001426 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001427 if (!string)
1428 return NULL;
1429
1430 list = PyList_New(0);
1431
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001432 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001433
1434 PyObject* item;
1435
1436 state.ptr = state.start;
1437
1438 if (state.charsize == 1) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001439 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +00001440 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001441#if defined(HAVE_UNICODE)
1442 status = sre_usearch(&state, PatternObject_GetCode(self));
1443#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001444 }
1445
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001446 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001447
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001448 /* don't bother to build a match object */
1449 switch (self->groups) {
1450 case 0:
1451 item = PySequence_GetSlice(
1452 string,
1453 ((char*) state.start - (char*) state.beginning) /
1454 state.charsize,
1455 ((char*) state.ptr - (char*) state.beginning) /
1456 state.charsize);
1457 if (!item)
1458 goto error;
1459 break;
1460 case 1:
1461 item = state_getslice(&state, 1, string);
1462 if (!item)
1463 goto error;
1464 break;
1465 default:
1466 item = PyTuple_New(self->groups);
1467 if (!item)
1468 goto error;
1469 for (i = 0; i < self->groups; i++) {
1470 PyObject* o = state_getslice(&state, i+1, string);
1471 if (!o) {
1472 Py_DECREF(item);
1473 goto error;
1474 }
1475 PyTuple_SET_ITEM(item, i, o);
1476 }
1477 break;
1478 }
1479
1480 if (PyList_Append(list, item) < 0) {
1481 Py_DECREF(item);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001482 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001483 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001484
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001485 if (state.ptr == state.start)
1486 state.start = (void*) ((char*) state.ptr + state.charsize);
1487 else
1488 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001489
1490 } else {
1491
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001492 if (status == 0)
1493 break;
1494
Guido van Rossumb700df92000-03-31 14:59:30 +00001495 /* internal error */
1496 PyErr_SetString(
1497 PyExc_RuntimeError,
1498 "internal error in regular expression engine"
1499 );
1500 goto error;
1501
1502 }
1503 }
1504
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001505 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001506 return list;
1507
1508error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001509 Py_DECREF(list);
1510 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001511 return NULL;
1512
1513}
1514
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001515static PyMethodDef pattern_methods[] = {
1516 {"match", (PyCFunction) pattern_match, 1},
1517 {"search", (PyCFunction) pattern_search, 1},
1518 {"sub", (PyCFunction) pattern_sub, 1},
1519 {"subn", (PyCFunction) pattern_subn, 1},
1520 {"split", (PyCFunction) pattern_split, 1},
1521 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001522 /* experimental */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001523 {"scanner", (PyCFunction) pattern_scanner, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001524 {NULL, NULL}
1525};
1526
1527static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001528pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001529{
1530 PyObject* res;
1531
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001532 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001533
1534 if (res)
1535 return res;
1536
1537 PyErr_Clear();
1538
1539 /* attributes */
1540 if (!strcmp(name, "pattern")) {
1541 Py_INCREF(self->pattern);
1542 return self->pattern;
1543 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001544
1545 if (!strcmp(name, "flags"))
1546 return Py_BuildValue("i", self->flags);
1547
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001548 if (!strcmp(name, "groups"))
1549 return Py_BuildValue("i", self->groups);
1550
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001551 if (!strcmp(name, "groupindex") && self->groupindex) {
1552 Py_INCREF(self->groupindex);
1553 return self->groupindex;
1554 }
1555
Guido van Rossumb700df92000-03-31 14:59:30 +00001556 PyErr_SetString(PyExc_AttributeError, name);
1557 return NULL;
1558}
1559
1560statichere PyTypeObject Pattern_Type = {
1561 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001562 0, "SRE_Pattern", sizeof(PatternObject), 0,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001563 (destructor)pattern_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001564 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001565 (getattrfunc)pattern_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001566};
1567
1568/* -------------------------------------------------------------------- */
1569/* match methods */
1570
1571static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001572match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001573{
1574 Py_XDECREF(self->string);
1575 Py_DECREF(self->pattern);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001576 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001577}
1578
1579static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001580match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001581{
1582 if (index < 0 || index >= self->groups) {
1583 /* raise IndexError if we were given a bad group number */
1584 PyErr_SetString(
1585 PyExc_IndexError,
1586 "no such group"
1587 );
1588 return NULL;
1589 }
1590
1591 if (self->string == Py_None || self->mark[index+index] < 0) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001592 /* return default value if the string or group is undefined */
1593 Py_INCREF(def);
1594 return def;
Guido van Rossumb700df92000-03-31 14:59:30 +00001595 }
1596
1597 return PySequence_GetSlice(
1598 self->string, self->mark[index+index], self->mark[index+index+1]
1599 );
1600}
1601
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001602static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001603match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001604{
1605 if (!PyInt_Check(index) && self->pattern->groupindex != NULL) {
1606 /* FIXME: resource leak? */
1607 index = PyObject_GetItem(self->pattern->groupindex, index);
1608 if (!index)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001609 return -1;
Guido van Rossumb700df92000-03-31 14:59:30 +00001610 }
1611
1612 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001613 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001614
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001615 return -1;
1616}
1617
1618static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001619match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001620{
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001621 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001622}
1623
1624static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001625match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001626{
1627 PyObject* result;
1628 int i, size;
1629
1630 size = PyTuple_GET_SIZE(args);
1631
1632 switch (size) {
1633 case 0:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001634 result = match_getslice(self, Py_False, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001635 break;
1636 case 1:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001637 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001638 break;
1639 default:
1640 /* fetch multiple items */
1641 result = PyTuple_New(size);
1642 if (!result)
1643 return NULL;
1644 for (i = 0; i < size; i++) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001645 PyObject* item = match_getslice(
1646 self, PyTuple_GET_ITEM(args, i), Py_None
1647 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001648 if (!item) {
1649 Py_DECREF(result);
1650 return NULL;
1651 }
1652 PyTuple_SET_ITEM(result, i, item);
1653 }
1654 break;
1655 }
1656 return result;
1657}
1658
1659static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001660match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001661{
1662 PyObject* result;
1663 int index;
1664
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001665 PyObject* def = Py_None;
1666 if (!PyArg_ParseTuple(args, "|O", &def))
1667 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001668
Guido van Rossumb700df92000-03-31 14:59:30 +00001669 result = PyTuple_New(self->groups-1);
1670 if (!result)
1671 return NULL;
1672
1673 for (index = 1; index < self->groups; index++) {
1674 PyObject* item;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001675 item = match_getslice_by_index(self, index, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001676 if (!item) {
1677 Py_DECREF(result);
1678 return NULL;
1679 }
1680 PyTuple_SET_ITEM(result, index-1, item);
1681 }
1682
1683 return result;
1684}
1685
1686static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001687match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001688{
1689 PyObject* result;
1690 PyObject* keys;
1691 int index;
1692
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001693 PyObject* def = Py_None;
1694 if (!PyArg_ParseTuple(args, "|O", &def))
1695 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001696
Guido van Rossumb700df92000-03-31 14:59:30 +00001697 result = PyDict_New();
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001698 if (!result || !self->pattern->groupindex)
Guido van Rossumb700df92000-03-31 14:59:30 +00001699 return result;
1700
1701 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001702 if (!keys) {
1703 Py_DECREF(result);
Guido van Rossumb700df92000-03-31 14:59:30 +00001704 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001705 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001706
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001707 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001708 PyObject* key;
1709 PyObject* item;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001710 key = PyList_GET_ITEM(keys, index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001711 if (!key) {
1712 Py_DECREF(keys);
1713 Py_DECREF(result);
1714 return NULL;
1715 }
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001716 item = match_getslice(self, key, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001717 if (!item) {
1718 Py_DECREF(key);
1719 Py_DECREF(keys);
1720 Py_DECREF(result);
1721 return NULL;
1722 }
1723 /* FIXME: <fl> this can fail, right? */
1724 PyDict_SetItem(result, key, item);
1725 }
1726
1727 Py_DECREF(keys);
1728
1729 return result;
1730}
1731
1732static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001733match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001734{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001735 int index;
1736
1737 PyObject* index_ = Py_False;
1738 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001739 return NULL;
1740
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001741 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001742
Guido van Rossumb700df92000-03-31 14:59:30 +00001743 if (index < 0 || index >= self->groups) {
1744 PyErr_SetString(
1745 PyExc_IndexError,
1746 "no such group"
1747 );
1748 return NULL;
1749 }
1750
1751 if (self->mark[index*2] < 0) {
1752 Py_INCREF(Py_None);
1753 return Py_None;
1754 }
1755
1756 return Py_BuildValue("i", self->mark[index*2]);
1757}
1758
1759static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001760match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001761{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001762 int index;
1763
1764 PyObject* index_ = Py_False;
1765 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001766 return NULL;
1767
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001768 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001769
Guido van Rossumb700df92000-03-31 14:59:30 +00001770 if (index < 0 || index >= self->groups) {
1771 PyErr_SetString(
1772 PyExc_IndexError,
1773 "no such group"
1774 );
1775 return NULL;
1776 }
1777
1778 if (self->mark[index*2] < 0) {
1779 Py_INCREF(Py_None);
1780 return Py_None;
1781 }
1782
1783 return Py_BuildValue("i", self->mark[index*2+1]);
1784}
1785
1786static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001787match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001788{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001789 int index;
1790
1791 PyObject* index_ = Py_False;
1792 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001793 return NULL;
1794
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001795 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001796
Guido van Rossumb700df92000-03-31 14:59:30 +00001797 if (index < 0 || index >= self->groups) {
1798 PyErr_SetString(
1799 PyExc_IndexError,
1800 "no such group"
1801 );
1802 return NULL;
1803 }
1804
1805 if (self->mark[index*2] < 0) {
1806 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001807 Py_INCREF(Py_None);
1808 return Py_BuildValue("OO", Py_None, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001809 }
1810
1811 return Py_BuildValue("ii", self->mark[index*2], self->mark[index*2+1]);
1812}
1813
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001814static PyMethodDef match_methods[] = {
1815 {"group", (PyCFunction) match_group, 1},
1816 {"start", (PyCFunction) match_start, 1},
1817 {"end", (PyCFunction) match_end, 1},
1818 {"span", (PyCFunction) match_span, 1},
1819 {"groups", (PyCFunction) match_groups, 1},
1820 {"groupdict", (PyCFunction) match_groupdict, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001821 {NULL, NULL}
1822};
1823
1824static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001825match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001826{
1827 PyObject* res;
1828
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001829 res = Py_FindMethod(match_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001830 if (res)
1831 return res;
1832
1833 PyErr_Clear();
1834
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001835 /* attributes */
Guido van Rossumb700df92000-03-31 14:59:30 +00001836 if (!strcmp(name, "string")) {
1837 Py_INCREF(self->string);
1838 return self->string;
1839 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001840
Guido van Rossumb700df92000-03-31 14:59:30 +00001841 if (!strcmp(name, "re")) {
1842 Py_INCREF(self->pattern);
1843 return (PyObject*) self->pattern;
1844 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001845
Guido van Rossumb700df92000-03-31 14:59:30 +00001846 if (!strcmp(name, "pos"))
1847 return Py_BuildValue("i", 0); /* FIXME */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001848
Guido van Rossumb700df92000-03-31 14:59:30 +00001849 if (!strcmp(name, "endpos"))
1850 return Py_BuildValue("i", 0); /* FIXME */
1851
1852 PyErr_SetString(PyExc_AttributeError, name);
1853 return NULL;
1854}
1855
1856/* FIXME: implement setattr("string", None) as a special case (to
1857 detach the associated string, if any */
1858
1859statichere PyTypeObject Match_Type = {
1860 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001861 0, "SRE_Match",
Guido van Rossumb700df92000-03-31 14:59:30 +00001862 sizeof(MatchObject), /* size of basic object */
1863 sizeof(int), /* space for group item */
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001864 (destructor)match_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001865 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001866 (getattrfunc)match_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001867};
1868
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001869/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001870/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001871
1872static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001873scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001874{
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001875 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001876 Py_DECREF(self->string);
1877 Py_DECREF(self->pattern);
1878 PyMem_DEL(self);
1879}
1880
1881static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001882scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001883{
1884 SRE_STATE* state = &self->state;
1885 PyObject* match;
1886 int status;
1887
1888 state->ptr = state->start;
1889
1890 if (state->charsize == 1) {
1891 status = sre_match(state, PatternObject_GetCode(self->pattern));
1892 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001893#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001894 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001895#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001896 }
1897
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001898 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001899 state, self->string, status);
1900
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001901 if (status == 0 || state->ptr == state->start)
1902 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001903 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001904 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001905
1906 return match;
1907}
1908
1909
1910static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001911scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001912{
1913 SRE_STATE* state = &self->state;
1914 PyObject* match;
1915 int status;
1916
1917 state->ptr = state->start;
1918
1919 if (state->charsize == 1) {
1920 status = sre_search(state, PatternObject_GetCode(self->pattern));
1921 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001922#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001923 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001924#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001925 }
1926
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001927 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001928 state, self->string, status);
1929
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001930 if (status == 0 || state->ptr == state->start)
1931 state->start = (void*) ((char*) state->ptr + state->charsize);
1932 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001933 state->start = state->ptr;
1934
1935 return match;
1936}
1937
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001938static PyMethodDef scanner_methods[] = {
1939 {"match", (PyCFunction) scanner_match, 0},
1940 {"search", (PyCFunction) scanner_search, 0},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001941 {NULL, NULL}
1942};
1943
1944static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001945scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001946{
1947 PyObject* res;
1948
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001949 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001950 if (res)
1951 return res;
1952
1953 PyErr_Clear();
1954
1955 /* attributes */
1956 if (!strcmp(name, "pattern")) {
1957 Py_INCREF(self->pattern);
1958 return self->pattern;
1959 }
1960
1961 PyErr_SetString(PyExc_AttributeError, name);
1962 return NULL;
1963}
1964
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001965statichere PyTypeObject Scanner_Type = {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001966 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001967 0, "SRE_Scanner",
1968 sizeof(ScannerObject), /* size of basic object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001969 0,
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001970 (destructor)scanner_dealloc, /*tp_dealloc*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001971 0, /*tp_print*/
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001972 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001973};
1974
Guido van Rossumb700df92000-03-31 14:59:30 +00001975static PyMethodDef _functions[] = {
1976 {"compile", _compile, 1},
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001977 {"getcodesize", sre_codesize, 1},
Fredrik Lundhb389df32000-06-29 12:48:37 +00001978 {"getlower", sre_getlower, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001979 {NULL, NULL}
1980};
1981
1982void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001983#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00001984__declspec(dllexport)
1985#endif
1986init_sre()
1987{
1988 /* Patch object types */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001989 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001990 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001991
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001992 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00001993}
1994
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001995#endif /* !defined(SRE_RECURSIVE) */