blob: 268c5dd82b9ca34e16147f9a489248f2be777c50 [file] [log] [blame]
Guido van Rossumb700df92000-03-31 14:59:30 +00001/* -*- Mode: C; tab-width: 4 -*-
2 *
3 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00004 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00005 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00006 *
7 * partial history:
Fredrik Lundh436c3d582000-06-29 08:58:44 +00008 * 99-10-24 fl created (based on existing template matcher code)
Guido van Rossumb700df92000-03-31 14:59:30 +00009 * 99-11-13 fl added categories, branching, and more (0.2)
10 * 99-11-16 fl some tweaks to compile on non-Windows platforms
11 * 99-12-18 fl non-literals, generic maximizing repeat (0.3)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012 * 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
13 * 00-03-06 fl first alpha, sort of (0.5)
14 * 00-03-14 fl removed most compatibility stuff (0.6)
15 * 00-05-10 fl towards third alpha (0.8.2)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000016 * 00-05-13 fl added experimental scanner stuff (0.8.3)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000017 * 00-05-27 fl final bug hunt (0.8.4)
18 * 00-06-21 fl less bugs, more taste (0.8.5)
19 * 00-06-25 fl major changes to better deal with nested repeats (0.9)
20 * 00-06-28 fl fixed findall (0.9.1)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000021 * 00-06-29 fl fixed split, added more scanner features (0.9.2)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000022 * 00-06-30 fl tuning, fast search (0.9.3)
Fredrik Lundh0640e112000-06-30 13:55:15 +000023 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
Guido van Rossumb700df92000-03-31 14:59:30 +000024 *
25 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
26 *
Guido van Rossumb700df92000-03-31 14:59:30 +000027 * Portions of this engine have been developed in cooperation with
28 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
29 * other compatibility work.
30 */
31
32#ifndef SRE_RECURSIVE
33
Fredrik Lundh43b3b492000-06-30 10:41:31 +000034char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000035
36#include "Python.h"
37
38#include "sre.h"
39
Guido van Rossumb700df92000-03-31 14:59:30 +000040#if defined(HAVE_LIMITS_H)
41#include <limits.h>
42#else
43#define INT_MAX 2147483647
44#endif
45
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000046#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000047
Fredrik Lundh436c3d582000-06-29 08:58:44 +000048/* name of this module, minus the leading underscore */
49#define MODULE "sre"
50
Guido van Rossumb700df92000-03-31 14:59:30 +000051/* defining this one enables tracing */
52#undef DEBUG
53
Fredrik Lundh436c3d582000-06-29 08:58:44 +000054#if PY_VERSION_HEX >= 0x01060000
55/* defining this enables unicode support (default under 1.6) */
56#define HAVE_UNICODE
57#endif
58
Fredrik Lundh29c08be2000-06-29 23:33:12 +000059/* optional features */
60#define USE_FAST_SEARCH
61
Fredrik Lundh80946112000-06-29 18:03:25 +000062#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000063#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
64/* fastest possible local call under MSVC */
65#define LOCAL(type) static __inline type __fastcall
66#else
Fredrik Lundh29c08be2000-06-29 23:33:12 +000067#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000068#endif
69
70/* error codes */
71#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
72#define SRE_ERROR_MEMORY -9 /* out of memory */
73
Fredrik Lundh436c3d582000-06-29 08:58:44 +000074#if defined(DEBUG)
Guido van Rossumb700df92000-03-31 14:59:30 +000075#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000076#else
77#define TRACE(v)
78#endif
79
Fredrik Lundh436c3d582000-06-29 08:58:44 +000080#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000081
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000082/* -------------------------------------------------------------------- */
83/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000084
Fredrik Lundh436c3d582000-06-29 08:58:44 +000085/* default character predicates (run sre_chars.py to regenerate tables) */
86
87#define SRE_DIGIT_MASK 1
88#define SRE_SPACE_MASK 2
89#define SRE_LINEBREAK_MASK 4
90#define SRE_ALNUM_MASK 8
91#define SRE_WORD_MASK 16
92
93static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
942, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
950, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
9625, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9724, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
980, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
100
Fredrik Lundhb389df32000-06-29 12:48:37 +0000101static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000010210, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
10327, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
10444, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
10561, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
106108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
107122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
108106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
109120, 121, 122, 123, 124, 125, 126, 127 };
110
Fredrik Lundhb389df32000-06-29 12:48:37 +0000111static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000112{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000113 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000114}
115
116#define SRE_IS_DIGIT(ch)\
117 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
118#define SRE_IS_SPACE(ch)\
119 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
120#define SRE_IS_LINEBREAK(ch)\
121 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
122#define SRE_IS_ALNUM(ch)\
123 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
124#define SRE_IS_WORD(ch)\
125 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000126
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000127/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000128
Fredrik Lundhb389df32000-06-29 12:48:37 +0000129static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000130{
131 return ((ch) < 256 ? tolower((ch)) : ch);
132}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000133#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
134#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
135#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
136#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
137#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
138
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000139/* unicode-specific character predicates */
140
141#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000142static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000143{
144 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
145}
146#define SRE_UNI_TO_LOWER(ch) Py_UNICODE_TOLOWER((Py_UNICODE)(ch))
147#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
148#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
149#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
150#define SRE_UNI_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
151#define SRE_UNI_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
152#endif
153
Guido van Rossumb700df92000-03-31 14:59:30 +0000154LOCAL(int)
155sre_category(SRE_CODE category, unsigned int ch)
156{
157 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000158
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000159 case SRE_CATEGORY_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000160 return SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000161 case SRE_CATEGORY_NOT_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000162 return !SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163 case SRE_CATEGORY_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000164 return SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165 case SRE_CATEGORY_NOT_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000166 return !SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000167 case SRE_CATEGORY_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000168 return SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000169 case SRE_CATEGORY_NOT_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000170 return !SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000171 case SRE_CATEGORY_LINEBREAK:
172 return SRE_IS_LINEBREAK(ch);
173 case SRE_CATEGORY_NOT_LINEBREAK:
174 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000175
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000176 case SRE_CATEGORY_LOC_WORD:
177 return SRE_LOC_IS_WORD(ch);
178 case SRE_CATEGORY_LOC_NOT_WORD:
179 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000180
181#if defined(HAVE_UNICODE)
182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
198#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000199 }
200 return 0;
201}
202
203/* helpers */
204
205LOCAL(int)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000206stack_free(SRE_STATE* state)
Guido van Rossumb700df92000-03-31 14:59:30 +0000207{
208 if (state->stack) {
209 TRACE(("release stack\n"));
210 free(state->stack);
211 state->stack = NULL;
212 }
213 state->stacksize = 0;
214 return 0;
215}
216
217static int /* shouldn't be LOCAL */
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000218stack_extend(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000219{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000220 SRE_STACK* stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000221 int stacksize;
222
223 /* grow the stack to a suitable size; we need at least lo entries,
224 at most hi entries. if for some reason hi is lower than lo, lo
225 wins */
226
227 stacksize = state->stacksize;
228
229 if (stacksize == 0) {
230 /* create new stack */
231 stacksize = 512;
232 if (stacksize < lo)
233 stacksize = lo;
234 else if (stacksize > hi)
235 stacksize = hi;
236 TRACE(("allocate stack %d\n", stacksize));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000237 stack = malloc(sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000238 } else {
239 /* grow the stack (typically by a factor of two) */
240 while (stacksize < lo)
241 stacksize = 2 * stacksize;
242 /* FIXME: <fl> could trim size if it's larger than lo, and
243 much larger than hi */
244 TRACE(("grow stack to %d\n", stacksize));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000245 stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000246 }
247
248 if (!stack) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000249 stack_free(state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000250 return SRE_ERROR_MEMORY;
251 }
252
253 state->stack = stack;
254 state->stacksize = stacksize;
255
256 return 0;
257}
258
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000259/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000260
261#define SRE_CHAR unsigned char
262#define SRE_AT sre_at
263#define SRE_MEMBER sre_member
264#define SRE_MATCH sre_match
265#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000266
267#if defined(HAVE_UNICODE)
268
Guido van Rossumb700df92000-03-31 14:59:30 +0000269#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000270#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000271#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000272
Guido van Rossumb700df92000-03-31 14:59:30 +0000273#undef SRE_SEARCH
274#undef SRE_MATCH
275#undef SRE_MEMBER
276#undef SRE_AT
277#undef SRE_CHAR
278
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000279/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000280
281#define SRE_CHAR Py_UNICODE
282#define SRE_AT sre_uat
283#define SRE_MEMBER sre_umember
284#define SRE_MATCH sre_umatch
285#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000286#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000287
288#endif /* SRE_RECURSIVE */
289
290/* -------------------------------------------------------------------- */
291/* String matching engine */
292
293/* the following section is compiled twice, with different character
294 settings */
295
296LOCAL(int)
297SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
298{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000299 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000300
301 int this, that;
302
303 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000304
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000305 case SRE_AT_BEGINNING:
Guido van Rossum29530882000-04-10 17:06:55 +0000306 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000307
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000308 case SRE_AT_BEGINNING_LINE:
309 return ((void*) ptr == state->beginning ||
310 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000311
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000312 case SRE_AT_END:
Guido van Rossum29530882000-04-10 17:06:55 +0000313 return ((void*) ptr == state->end);
Fredrik Lundh80946112000-06-29 18:03:25 +0000314
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000315 case SRE_AT_END_LINE:
316 return ((void*) ptr == state->end ||
317 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000318
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000319 case SRE_AT_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000320 if (state->beginning == state->end)
321 return 0;
322 that = ((void*) ptr > state->beginning) ?
323 SRE_IS_WORD((int) ptr[-1]) : 0;
324 this = ((void*) ptr < state->end) ?
325 SRE_IS_WORD((int) ptr[0]) : 0;
326 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000327
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000328 case SRE_AT_NON_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000329 if (state->beginning == state->end)
330 return 0;
331 that = ((void*) ptr > state->beginning) ?
332 SRE_IS_WORD((int) ptr[-1]) : 0;
333 this = ((void*) ptr < state->end) ?
334 SRE_IS_WORD((int) ptr[0]) : 0;
335 return this == that;
336 }
337
338 return 0;
339}
340
341LOCAL(int)
Fredrik Lundh0640e112000-06-30 13:55:15 +0000342SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000343{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000344 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000345
346 int ok = 1;
347
348 for (;;) {
349 switch (*set++) {
350
351 case SRE_OP_NEGATE:
352 ok = !ok;
353 break;
354
355 case SRE_OP_FAILURE:
356 return !ok;
357
358 case SRE_OP_LITERAL:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000359 if (ch == set[0])
Guido van Rossumb700df92000-03-31 14:59:30 +0000360 return ok;
361 set++;
362 break;
363
364 case SRE_OP_RANGE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000365 if (set[0] <= ch && ch <= set[1])
Guido van Rossumb700df92000-03-31 14:59:30 +0000366 return ok;
367 set += 2;
368 break;
369
370 case SRE_OP_CATEGORY:
371 if (sre_category(set[0], (int) ch))
372 return ok;
373 set += 1;
374 break;
375
376 default:
Fredrik Lundh80946112000-06-29 18:03:25 +0000377 /* internal error -- there's not much we can do about it
378 here, so let's just pretend it didn't match... */
Guido van Rossumb700df92000-03-31 14:59:30 +0000379 return 0;
380 }
381 }
382}
383
384LOCAL(int)
385SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
386{
387 /* check if string matches the given pattern. returns -1 for
388 error, 0 for failure, and 1 for success */
389
390 SRE_CHAR* end = state->end;
391 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000392 int stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000393 int stackbase;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000394 int lastmark;
Guido van Rossumb700df92000-03-31 14:59:30 +0000395 int i, count;
396
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000397 /* FIXME: this is a hack! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000398 void* mark_copy[SRE_MARK_SIZE];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000399 void* mark = NULL;
400
401 TRACE(("%8d: enter\n", PTR(ptr)));
402
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000403 if (pattern[0] == SRE_OP_INFO) {
404 /* optimization info block */
405 /* args: <1=skip> <2=flags> <3=min> ... */
406 if (pattern[3] && (end - ptr) < pattern[3]) {
407 TRACE(("reject (got %d chars, need %d)\n",
408 (end - ptr), pattern[3]));
409 return 0;
410 }
411 pattern += pattern[1] + 1;
412 }
413
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000414 stackbase = stack = state->stackbase;
415 lastmark = state->lastmark;
416
417 retry:
Guido van Rossumb700df92000-03-31 14:59:30 +0000418
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000419 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000420
421 switch (*pattern++) {
422
423 case SRE_OP_FAILURE:
424 /* immediate failure */
425 TRACE(("%8d: failure\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000426 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000427
428 case SRE_OP_SUCCESS:
429 /* end of pattern */
430 TRACE(("%8d: success\n", PTR(ptr)));
431 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000432 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000433
434 case SRE_OP_AT:
435 /* match at given position */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000436 /* args: <at> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000437 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
Guido van Rossumb700df92000-03-31 14:59:30 +0000438 if (!SRE_AT(state, ptr, *pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000439 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000440 pattern++;
441 break;
442
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000443 case SRE_OP_CATEGORY:
444 /* match at given category */
445 /* args: <category> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000446 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
447 *ptr, *pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000448 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
449 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000450 TRACE(("%8d: category ok\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000451 pattern++;
452 ptr++;
453 break;
454
Guido van Rossumb700df92000-03-31 14:59:30 +0000455 case SRE_OP_LITERAL:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000456 /* match literal string */
Guido van Rossumb700df92000-03-31 14:59:30 +0000457 /* args: <code> */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000458 TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
459 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000460 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000461 pattern++;
462 ptr++;
463 break;
464
465 case SRE_OP_NOT_LITERAL:
466 /* match anything that is not literal character */
467 /* args: <code> */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000468 TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
469 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000470 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000471 pattern++;
472 ptr++;
473 break;
474
475 case SRE_OP_ANY:
476 /* match anything */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000477 TRACE(("%8d: anything\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000478 if (ptr >= end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000479 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000480 ptr++;
481 break;
482
483 case SRE_OP_IN:
484 /* match set member (or non_member) */
485 /* args: <skip> <set> */
486 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
487 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000488 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000489 pattern += pattern[0];
490 ptr++;
491 break;
492
493 case SRE_OP_GROUP:
494 /* match backreference */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000495 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000496 i = pattern[0];
497 {
Guido van Rossumb700df92000-03-31 14:59:30 +0000498 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
499 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
500 if (!p || !e || e < p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000501 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000502 while (p < e) {
503 if (ptr >= end || *ptr != *p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000504 goto failure;
505 p++; ptr++;
506 }
507 }
508 pattern++;
509 break;
510
511 case SRE_OP_GROUP_IGNORE:
512 /* match backreference */
513 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
514 i = pattern[0];
515 {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000516 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
517 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000518 if (!p || !e || e < p)
519 goto failure;
520 while (p < e) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000521 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000522 state->lower(*ptr) != state->lower(*p))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000523 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000524 p++; ptr++;
525 }
526 }
527 pattern++;
528 break;
529
530 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000531 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000532 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000533 state->lower(*ptr) != state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000534 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000535 pattern++;
536 ptr++;
537 break;
538
539 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000540 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000541 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000542 state->lower(*ptr) == state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000543 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000544 pattern++;
545 ptr++;
546 break;
547
548 case SRE_OP_IN_IGNORE:
549 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
550 if (ptr >= end
Fredrik Lundh0640e112000-06-30 13:55:15 +0000551 || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000552 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000553 pattern += pattern[0];
554 ptr++;
555 break;
556
557 case SRE_OP_MARK:
558 /* set mark */
559 /* args: <mark> */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000560 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
561 if (state->lastmark < pattern[0])
562 state->lastmark = pattern[0];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000563 if (!mark) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000564 mark = mark_copy;
565 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000566 }
567 state->mark[pattern[0]] = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000568 pattern++;
569 break;
570
571 case SRE_OP_JUMP:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000572 case SRE_OP_INFO:
Guido van Rossumb700df92000-03-31 14:59:30 +0000573 /* jump forward */
574 /* args: <skip> */
575 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
576 pattern += pattern[0];
577 break;
578
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000579 case SRE_OP_ASSERT:
580 /* assert subpattern */
Guido van Rossumb700df92000-03-31 14:59:30 +0000581 /* args: <skip> <pattern> */
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000582 TRACE(("%8d: assert subpattern\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000583 state->ptr = ptr;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000584 i = SRE_MATCH(state, pattern + 1);
585 if (i < 0)
586 return i;
587 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000588 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000589 pattern += pattern[0];
Guido van Rossumb700df92000-03-31 14:59:30 +0000590 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000591
592 case SRE_OP_ASSERT_NOT:
593 /* assert not subpattern */
594 /* args: <skip> <pattern> */
595 TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
596 state->ptr = ptr;
597 i = SRE_MATCH(state, pattern + 1);
598 if (i < 0)
599 return i;
600 if (i)
601 goto failure;
602 pattern += pattern[0];
603 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000604
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000605#if 0
Guido van Rossumb700df92000-03-31 14:59:30 +0000606 case SRE_OP_MAX_REPEAT_ONE:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000607 /* match repeated sequence (maximizing regexp) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000608
609 /* this operator only works if the repeated item is
610 exactly one character wide, and we're not already
611 collecting backtracking points. for other cases,
612 use the MAX_REPEAT operator instead */
613
Guido van Rossumb700df92000-03-31 14:59:30 +0000614 /* args: <skip> <min> <max> <step> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000615 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
616 pattern[1], pattern[2]));
617
618 count = 0;
619
620 if (pattern[3] == SRE_OP_ANY) {
621 /* repeated wildcard. skip to the end of the target
622 string, and backtrack from there */
623 /* FIXME: must look for line endings */
624 if (ptr + pattern[1] > end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000625 goto failure; /* cannot match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000626 count = pattern[2];
627 if (count > end - ptr)
628 count = end - ptr;
629 ptr += count;
630
631 } else if (pattern[3] == SRE_OP_LITERAL) {
632 /* repeated literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000633 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000634 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000635 if (ptr >= end || (SRE_CODE) ptr[0] != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000636 break;
637 ptr++;
638 count++;
639 }
640
641 } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
642 /* repeated literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000643 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000644 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000645 if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000646 break;
647 ptr++;
648 count++;
649 }
650
651 } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
652 /* repeated non-literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000653 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000654 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000655 if (ptr >= end || (SRE_CODE) ptr[0] == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000656 break;
657 ptr++;
658 count++;
659 }
660
661 } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
662 /* repeated non-literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000663 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000664 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000665 if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000666 break;
667 ptr++;
668 count++;
669 }
670
671 } else if (pattern[3] == SRE_OP_IN) {
672 /* repeated set */
673 while (count < (int) pattern[2]) {
674 if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
675 break;
676 ptr++;
677 count++;
678 }
679
680 } else {
681 /* repeated single character pattern */
682 state->ptr = ptr;
683 while (count < (int) pattern[2]) {
684 i = SRE_MATCH(state, pattern + 3);
685 if (i < 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000686 return i;
687 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000688 break;
689 count++;
690 }
691 state->ptr = ptr;
692 ptr += count;
693 }
694
695 /* when we arrive here, count contains the number of
696 matches, and ptr points to the tail of the target
697 string. check if the rest of the pattern matches, and
698 backtrack if not. */
699
Guido van Rossumb700df92000-03-31 14:59:30 +0000700 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
701
702 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000703 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000704
705 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
706 /* tail is empty. we're finished */
707 TRACE(("%8d: tail is empty\n", PTR(ptr)));
708 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000709 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000710
711 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000712 /* tail starts with a literal. skip positions where
713 the rest of the pattern cannot possibly match */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000714 SRE_CODE chr = pattern[pattern[0]+1];
Guido van Rossumb700df92000-03-31 14:59:30 +0000715 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
716 for (;;) {
717 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
718 while (count >= (int) pattern[1] &&
719 (ptr >= end || *ptr != chr)) {
720 ptr--;
721 count--;
722 }
723 TRACE(("%8d: check tail\n", PTR(ptr)));
724 if (count < (int) pattern[1])
725 break;
726 state->ptr = ptr;
727 i = SRE_MATCH(state, pattern + pattern[0]);
728 if (i > 0) {
729 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000731 }
732 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
733 ptr--;
734 count--;
735 }
736
737 } else {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000738 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +0000739 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
740 while (count >= (int) pattern[1]) {
741 state->ptr = ptr;
742 i = SRE_MATCH(state, pattern + pattern[0]);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000743 if (i < 0)
744 return i;
745 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000746 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000747 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000748 }
749 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
750 ptr--;
751 count--;
752 }
753 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000754 goto failure;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000755#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000756
757 case SRE_OP_MAX_REPEAT:
758 /* match repeated sequence (maximizing regexp). repeated
759 group should end with a MAX_UNTIL code */
760
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000761 /* args: <skip> <min> <max> <item> */
762
763 TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
Guido van Rossumb700df92000-03-31 14:59:30 +0000764 pattern[1], pattern[2]));
765
766 count = 0;
767 state->ptr = ptr;
768
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000769 /* match minimum number of items */
770 while (count < (int) pattern[1]) {
771 i = SRE_MATCH(state, pattern + 3);
772 if (i < 0)
773 return i;
774 if (!i)
775 goto failure;
776 if (state->ptr == ptr) {
777 /* if the match was successful but empty, set the
778 count to max and terminate the scanning loop */
779 count = (int) pattern[2];
780 break;
781 }
782 count++;
783 ptr = state->ptr;
784 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000785
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000786 TRACE(("%8d: found %d leading items\n", PTR(ptr), count));
Guido van Rossumb700df92000-03-31 14:59:30 +0000787
788 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000789 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000790
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000791 /* match maximum number of items, pushing alternate end
792 points to the stack */
Guido van Rossumb700df92000-03-31 14:59:30 +0000793
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000794 while (pattern[2] == 32767 || count < (int) pattern[2]) {
795 state->stackbase = stack;
796 i = SRE_MATCH(state, pattern + 3);
797 state->stackbase = stackbase; /* rewind */
798 if (i < 0)
799 return i;
800 if (!i)
801 break;
802 if (state->ptr == ptr) {
803 count = (int) pattern[2];
804 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000805 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000806 /* this position was valid; add it to the retry
807 stack */
808 if (stack >= state->stacksize) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000809 i = stack_extend(state, stack + 1,
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000810 stackbase + pattern[2]);
811 if (i < 0)
812 return i; /* out of memory */
813 }
814 TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
815 state->stack[stack].ptr = ptr;
816 state->stack[stack].pattern = pattern + pattern[0];
817 stack++;
818 /* move forward */
819 ptr = state->ptr;
820 count++;
Guido van Rossumb700df92000-03-31 14:59:30 +0000821 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000822
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000823 /* when we get here, count is the number of successful
824 matches, and ptr points to the tail. */
Guido van Rossumb700df92000-03-31 14:59:30 +0000825
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000826 TRACE(("%8d: skip +%d\n", PTR(ptr), pattern[0]));
827
828 pattern += pattern[0];
829 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000830
831 case SRE_OP_MIN_REPEAT:
832 /* match repeated sequence (minimizing regexp) */
833 TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
834 pattern[1], pattern[2]));
835 count = 0;
836 state->ptr = ptr;
837 /* match minimum number of items */
838 while (count < (int) pattern[1]) {
839 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000840 if (i < 0)
841 return i;
842 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000843 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000844 count++;
845 }
846 /* move forward until the tail matches. */
847 while (count <= (int) pattern[2]) {
848 ptr = state->ptr;
849 i = SRE_MATCH(state, pattern + pattern[0]);
850 if (i > 0) {
851 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000852 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000853 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000854 state->ptr = ptr; /* backtrack */
855 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000856 if (i < 0)
857 return i;
858 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000859 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000860 count++;
861 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000862 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000863
Guido van Rossumb700df92000-03-31 14:59:30 +0000864 case SRE_OP_BRANCH:
865 /* match one of several subpatterns */
866 /* format: <branch> <size> <head> ... <null> <tail> */
867 TRACE(("%8d: branch\n", PTR(ptr)));
868 while (*pattern) {
869 if (pattern[1] != SRE_OP_LITERAL ||
Fredrik Lundh0640e112000-06-30 13:55:15 +0000870 (ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000871 TRACE(("%8d: branch check\n", PTR(ptr)));
872 state->ptr = ptr;
873 i = SRE_MATCH(state, pattern + 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000874 if (i < 0)
875 return i;
876 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000877 TRACE(("%8d: branch succeeded\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000878 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000879 }
880 }
881 pattern += *pattern;
882 }
883 TRACE(("%8d: branch failed\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000884 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000885
886 case SRE_OP_REPEAT:
887 /* TEMPLATE: match repeated sequence (no backtracking) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000888 /* args: <skip> <min> <max> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000889 TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
890 count = 0;
891 state->ptr = ptr;
892 while (count < (int) pattern[2]) {
893 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000894 if (i < 0)
895 return i;
896 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000897 break;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000898 if (state->ptr == ptr) {
899 count = (int) pattern[2];
900 break;
901 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000902 count++;
903 }
904 if (count <= (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000905 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000906 TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
907 pattern += pattern[0];
908 ptr = state->ptr;
909 break;
910
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000911 default:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000912 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000913 return SRE_ERROR_ILLEGAL;
914 }
915 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000916
917 failure:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000918 if (stack-- > stackbase) {
919 ptr = state->stack[stack].ptr;
920 pattern = state->stack[stack].pattern;
921 TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
922 goto retry;
923 }
924 TRACE(("%8d: leave (failure)\n", PTR(ptr)));
925 state->stackbase = stackbase;
926 state->lastmark = lastmark;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000927 if (mark)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000928 memcpy(state->mark, mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000929 return 0;
930
931 success:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000932 TRACE(("%8d: leave (success)\n", PTR(ptr)));
933 state->stackbase = stackbase;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000934 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000935}
936
937LOCAL(int)
938SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
939{
940 SRE_CHAR* ptr = state->start;
941 SRE_CHAR* end = state->end;
942 int status = 0;
Fredrik Lundh80946112000-06-29 18:03:25 +0000943 int prefix_len = 0;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000944 SRE_CODE* prefix;
945 SRE_CODE* overlap;
946 int literal = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000947
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000948 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000949 /* optimization info block */
950 /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
951
952 if (pattern[3] > 0) {
953 /* adjust end point (but make sure we leave at least one
954 character in there) */
955 end -= pattern[3]-1;
956 if (end <= ptr)
957 end = ptr+1;
958 }
959
960 literal = pattern[2];
961
962 prefix = pattern + 6;
963 prefix_len = pattern[5];
964
965 overlap = prefix + prefix_len - 1;
966
967 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000968 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000969
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000970#if defined(USE_FAST_SEARCH)
971 if (prefix_len > 1) {
972 /* pattern starts with a known prefix. use the overlap
973 table to skip forward as fast as we possibly can */
974 int i = 0;
975 end = state->end;
976 while (ptr < end) {
977 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000978 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000979 if (!i)
980 break;
981 else
982 i = overlap[i];
983 } else {
984 if (++i == prefix_len) {
985 /* found a potential match */
986 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
987 state->start = ptr - prefix_len + 1;
988 state->ptr = ptr + 1;
989 if (literal)
990 return 1; /* all of it */
991 status = SRE_MATCH(state, pattern + 2*prefix_len);
992 if (status != 0)
993 return status;
994 /* close but no cigar -- try again */
995 i = overlap[i];
996 }
997 break;
998 }
999
1000 }
1001 ptr++;
1002 }
1003 return 0;
1004 }
1005#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001006
Guido van Rossumb700df92000-03-31 14:59:30 +00001007 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001008 /* pattern starts with a literal character. this is used for
1009 short prefixes, and if fast search is disabled*/
Fredrik Lundh0640e112000-06-30 13:55:15 +00001010 SRE_CODE chr = pattern[1];
Guido van Rossumb700df92000-03-31 14:59:30 +00001011 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001012 while (ptr < end && (SRE_CODE) ptr[0] != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +00001013 ptr++;
1014 if (ptr == end)
1015 return 0;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001016 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001017 state->start = ptr;
1018 state->ptr = ++ptr;
1019 status = SRE_MATCH(state, pattern + 2);
1020 if (status != 0)
1021 break;
1022 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001023 } else
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001024 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +00001025 while (ptr <= end) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001026 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001027 state->start = state->ptr = ptr++;
1028 status = SRE_MATCH(state, pattern);
1029 if (status != 0)
1030 break;
1031 }
1032
1033 return status;
1034}
1035
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001036#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001037
1038/* -------------------------------------------------------------------- */
1039/* factories and destructors */
1040
1041/* see sre.h for object declarations */
1042
1043staticforward PyTypeObject Pattern_Type;
1044staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001045staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001046
1047static PyObject *
1048_compile(PyObject* self_, PyObject* args)
1049{
1050 /* "compile" pattern descriptor to pattern object */
1051
1052 PatternObject* self;
1053
1054 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001055 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001056 PyObject* code;
1057 int groups = 0;
1058 PyObject* groupindex = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001059 if (!PyArg_ParseTuple(args, "OiO!|iO", &pattern, &flags,
1060 &PyString_Type, &code,
1061 &groups, &groupindex))
Guido van Rossumb700df92000-03-31 14:59:30 +00001062 return NULL;
1063
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001064 self = PyObject_NEW(PatternObject, &Pattern_Type);
Guido van Rossumb700df92000-03-31 14:59:30 +00001065 if (self == NULL)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001066
Guido van Rossumb700df92000-03-31 14:59:30 +00001067 return NULL;
1068
1069 Py_INCREF(pattern);
1070 self->pattern = pattern;
1071
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001072 self->flags = flags;
1073
Guido van Rossumb700df92000-03-31 14:59:30 +00001074 Py_INCREF(code);
1075 self->code = code;
1076
1077 self->groups = groups;
1078
1079 Py_XINCREF(groupindex);
1080 self->groupindex = groupindex;
1081
1082 return (PyObject*) self;
1083}
1084
1085static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001086sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001087{
1088 return Py_BuildValue("i", sizeof(SRE_CODE));
1089}
1090
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001091static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001092sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001093{
1094 int character, flags;
1095 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1096 return NULL;
1097 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001098 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001099#if defined(HAVE_UNICODE)
1100 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001101 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001102#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001103 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001104}
1105
Guido van Rossumb700df92000-03-31 14:59:30 +00001106LOCAL(PyObject*)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001107state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001108{
1109 /* prepare state object */
1110
1111 PyBufferProcs *buffer;
1112 int i, count;
1113 void* ptr;
1114
1115 PyObject* string;
1116 int start = 0;
1117 int end = INT_MAX;
1118 if (!PyArg_ParseTuple(args, "O|ii", &string, &start, &end))
1119 return NULL;
1120
1121 /* get pointer to string buffer */
1122 buffer = string->ob_type->tp_as_buffer;
1123 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1124 buffer->bf_getsegcount(string, NULL) != 1) {
1125 PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
1126 return NULL;
1127 }
1128
1129 /* determine buffer size */
1130 count = buffer->bf_getreadbuffer(string, 0, &ptr);
1131 if (count < 0) {
1132 /* sanity check */
1133 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1134 return NULL;
1135 }
1136
1137 /* determine character size */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001138#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001139 state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001140#else
1141 state->charsize = 1;
1142#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001143
1144 count /= state->charsize;
1145
1146 /* adjust boundaries */
1147 if (start < 0)
1148 start = 0;
1149 else if (start > count)
1150 start = count;
1151
1152 if (end < 0)
1153 end = 0;
1154 else if (end > count)
1155 end = count;
1156
1157 state->beginning = ptr;
1158
1159 state->start = (void*) ((char*) ptr + start * state->charsize);
1160 state->end = (void*) ((char*) ptr + end * state->charsize);
1161
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001162 state->lastmark = 0;
1163
Guido van Rossumb700df92000-03-31 14:59:30 +00001164 /* FIXME: dynamic! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001165 for (i = 0; i < SRE_MARK_SIZE; i++)
Guido van Rossumb700df92000-03-31 14:59:30 +00001166 state->mark[i] = NULL;
1167
1168 state->stack = NULL;
1169 state->stackbase = 0;
1170 state->stacksize = 0;
1171
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001172 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001173 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001174#if defined(HAVE_UNICODE)
1175 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001176 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001177#endif
1178 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001179 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001180
Guido van Rossumb700df92000-03-31 14:59:30 +00001181 return string;
1182}
1183
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001184LOCAL(void)
1185state_fini(SRE_STATE* state)
1186{
1187 stack_free(state);
1188}
1189
1190LOCAL(PyObject*)
1191state_getslice(SRE_STATE* state, int index, PyObject* string)
1192{
1193 index = (index - 1) * 2;
1194
1195 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1196 Py_INCREF(Py_None);
1197 return Py_None;
1198 }
1199
1200 return PySequence_GetSlice(
1201 string,
1202 ((char*)state->mark[index] - (char*)state->beginning) /
1203 state->charsize,
1204 ((char*)state->mark[index+1] - (char*)state->beginning) /
1205 state->charsize
1206 );
1207}
1208
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001209static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001210pattern_new_match(PatternObject* pattern, SRE_STATE* state,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001211 PyObject* string, int status)
1212{
1213 /* create match object (from state object) */
1214
1215 MatchObject* match;
1216 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001217 char* base;
1218 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001219
1220 if (status > 0) {
1221
1222 /* create match object (with room for extra group marks) */
1223 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2*pattern->groups);
1224 if (match == NULL)
1225 return NULL;
1226
1227 Py_INCREF(pattern);
1228 match->pattern = pattern;
1229
1230 Py_INCREF(string);
1231 match->string = string;
1232
1233 match->groups = pattern->groups+1;
1234
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001235 base = (char*) state->beginning;
1236 n = state->charsize;
1237
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001238 /* group zero */
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001239 match->mark[0] = ((char*) state->start - base) / n;
1240 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001241
1242 /* fill in the rest of the groups */
1243 for (i = j = 0; i < pattern->groups; i++, j+=2)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001244 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1245 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1246 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001247 } else
1248 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1249
1250 return (PyObject*) match;
1251
1252 } else if (status < 0) {
1253
1254 /* internal error */
1255 PyErr_SetString(
1256 PyExc_RuntimeError, "internal error in regular expression engine"
1257 );
1258 return NULL;
1259
1260 }
1261
1262 Py_INCREF(Py_None);
1263 return Py_None;
1264}
1265
1266static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001267pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001268{
1269 /* create search state object */
1270
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001271 ScannerObject* self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001272 PyObject* string;
1273
1274 /* create match object (with room for extra group marks) */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001275 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001276 if (self == NULL)
1277 return NULL;
1278
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001279 string = state_init(&self->state, pattern, args);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001280 if (!string) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001281 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001282 return NULL;
1283 }
1284
1285 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001286 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001287
1288 Py_INCREF(string);
1289 self->string = string;
1290
1291 return (PyObject*) self;
1292}
1293
Guido van Rossumb700df92000-03-31 14:59:30 +00001294static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001295pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001296{
1297 Py_XDECREF(self->code);
1298 Py_XDECREF(self->pattern);
1299 Py_XDECREF(self->groupindex);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001300 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001301}
1302
1303static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001304pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001305{
1306 SRE_STATE state;
1307 PyObject* string;
1308 int status;
1309
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001310 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001311 if (!string)
1312 return NULL;
1313
1314 state.ptr = state.start;
1315
1316 if (state.charsize == 1) {
1317 status = sre_match(&state, PatternObject_GetCode(self));
1318 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001319#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001320 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001321#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001322 }
1323
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001324 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001325
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001326 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001327}
1328
1329static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001330pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001331{
1332 SRE_STATE state;
1333 PyObject* string;
1334 int status;
1335
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001336 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001337 if (!string)
1338 return NULL;
1339
1340 if (state.charsize == 1) {
1341 status = sre_search(&state, PatternObject_GetCode(self));
1342 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001343#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001344 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001345#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001346 }
1347
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001348 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001349
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001350 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001351}
1352
1353static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001354call(char* function, PyObject* args)
1355{
1356 PyObject* name;
1357 PyObject* module;
1358 PyObject* func;
1359 PyObject* result;
1360
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001361 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001362 if (!name)
1363 return NULL;
1364 module = PyImport_Import(name);
1365 Py_DECREF(name);
1366 if (!module)
1367 return NULL;
1368 func = PyObject_GetAttrString(module, function);
1369 Py_DECREF(module);
1370 if (!func)
1371 return NULL;
1372 result = PyObject_CallObject(func, args);
1373 Py_DECREF(func);
1374 Py_DECREF(args);
1375 return result;
1376}
1377
1378static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001379pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001380{
1381 PyObject* template;
1382 PyObject* string;
1383 PyObject* count;
1384 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1385 return NULL;
1386
1387 /* delegate to Python code */
1388 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1389}
1390
1391static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001392pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001393{
1394 PyObject* template;
1395 PyObject* string;
1396 PyObject* count;
1397 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1398 return NULL;
1399
1400 /* delegate to Python code */
1401 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1402}
1403
1404static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001405pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001406{
1407 PyObject* string;
1408 PyObject* maxsplit;
1409 if (!PyArg_ParseTuple(args, "OO", &string, &maxsplit))
1410 return NULL;
1411
1412 /* delegate to Python code */
1413 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1414}
1415
1416static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001417pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001418{
Guido van Rossumb700df92000-03-31 14:59:30 +00001419 SRE_STATE state;
1420 PyObject* string;
1421 PyObject* list;
1422 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001423 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001424
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001425 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001426 if (!string)
1427 return NULL;
1428
1429 list = PyList_New(0);
1430
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001431 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001432
1433 PyObject* item;
1434
1435 state.ptr = state.start;
1436
1437 if (state.charsize == 1) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001438 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +00001439 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001440#if defined(HAVE_UNICODE)
1441 status = sre_usearch(&state, PatternObject_GetCode(self));
1442#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001443 }
1444
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001445 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001446
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001447 /* don't bother to build a match object */
1448 switch (self->groups) {
1449 case 0:
1450 item = PySequence_GetSlice(
1451 string,
1452 ((char*) state.start - (char*) state.beginning) /
1453 state.charsize,
1454 ((char*) state.ptr - (char*) state.beginning) /
1455 state.charsize);
1456 if (!item)
1457 goto error;
1458 break;
1459 case 1:
1460 item = state_getslice(&state, 1, string);
1461 if (!item)
1462 goto error;
1463 break;
1464 default:
1465 item = PyTuple_New(self->groups);
1466 if (!item)
1467 goto error;
1468 for (i = 0; i < self->groups; i++) {
1469 PyObject* o = state_getslice(&state, i+1, string);
1470 if (!o) {
1471 Py_DECREF(item);
1472 goto error;
1473 }
1474 PyTuple_SET_ITEM(item, i, o);
1475 }
1476 break;
1477 }
1478
1479 if (PyList_Append(list, item) < 0) {
1480 Py_DECREF(item);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001481 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001482 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001483
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001484 if (state.ptr == state.start)
1485 state.start = (void*) ((char*) state.ptr + state.charsize);
1486 else
1487 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001488
1489 } else {
1490
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001491 if (status == 0)
1492 break;
1493
Guido van Rossumb700df92000-03-31 14:59:30 +00001494 /* internal error */
1495 PyErr_SetString(
1496 PyExc_RuntimeError,
1497 "internal error in regular expression engine"
1498 );
1499 goto error;
1500
1501 }
1502 }
1503
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001504 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001505 return list;
1506
1507error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001508 Py_DECREF(list);
1509 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001510 return NULL;
1511
1512}
1513
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001514static PyMethodDef pattern_methods[] = {
1515 {"match", (PyCFunction) pattern_match, 1},
1516 {"search", (PyCFunction) pattern_search, 1},
1517 {"sub", (PyCFunction) pattern_sub, 1},
1518 {"subn", (PyCFunction) pattern_subn, 1},
1519 {"split", (PyCFunction) pattern_split, 1},
1520 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001521 /* experimental */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001522 {"scanner", (PyCFunction) pattern_scanner, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001523 {NULL, NULL}
1524};
1525
1526static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001527pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001528{
1529 PyObject* res;
1530
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001531 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001532
1533 if (res)
1534 return res;
1535
1536 PyErr_Clear();
1537
1538 /* attributes */
1539 if (!strcmp(name, "pattern")) {
1540 Py_INCREF(self->pattern);
1541 return self->pattern;
1542 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001543
1544 if (!strcmp(name, "flags"))
1545 return Py_BuildValue("i", self->flags);
1546
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001547 if (!strcmp(name, "groups"))
1548 return Py_BuildValue("i", self->groups);
1549
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001550 if (!strcmp(name, "groupindex") && self->groupindex) {
1551 Py_INCREF(self->groupindex);
1552 return self->groupindex;
1553 }
1554
Guido van Rossumb700df92000-03-31 14:59:30 +00001555 PyErr_SetString(PyExc_AttributeError, name);
1556 return NULL;
1557}
1558
1559statichere PyTypeObject Pattern_Type = {
1560 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001561 0, "SRE_Pattern", sizeof(PatternObject), 0,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001562 (destructor)pattern_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001563 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001564 (getattrfunc)pattern_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001565};
1566
1567/* -------------------------------------------------------------------- */
1568/* match methods */
1569
1570static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001571match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001572{
1573 Py_XDECREF(self->string);
1574 Py_DECREF(self->pattern);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001575 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001576}
1577
1578static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001579match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001580{
1581 if (index < 0 || index >= self->groups) {
1582 /* raise IndexError if we were given a bad group number */
1583 PyErr_SetString(
1584 PyExc_IndexError,
1585 "no such group"
1586 );
1587 return NULL;
1588 }
1589
1590 if (self->string == Py_None || self->mark[index+index] < 0) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001591 /* return default value if the string or group is undefined */
1592 Py_INCREF(def);
1593 return def;
Guido van Rossumb700df92000-03-31 14:59:30 +00001594 }
1595
1596 return PySequence_GetSlice(
1597 self->string, self->mark[index+index], self->mark[index+index+1]
1598 );
1599}
1600
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001601static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001602match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001603{
1604 if (!PyInt_Check(index) && self->pattern->groupindex != NULL) {
1605 /* FIXME: resource leak? */
1606 index = PyObject_GetItem(self->pattern->groupindex, index);
1607 if (!index)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001608 return -1;
Guido van Rossumb700df92000-03-31 14:59:30 +00001609 }
1610
1611 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001612 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001613
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001614 return -1;
1615}
1616
1617static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001618match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001619{
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001620 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001621}
1622
1623static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001624match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001625{
1626 PyObject* result;
1627 int i, size;
1628
1629 size = PyTuple_GET_SIZE(args);
1630
1631 switch (size) {
1632 case 0:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001633 result = match_getslice(self, Py_False, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001634 break;
1635 case 1:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001636 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001637 break;
1638 default:
1639 /* fetch multiple items */
1640 result = PyTuple_New(size);
1641 if (!result)
1642 return NULL;
1643 for (i = 0; i < size; i++) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001644 PyObject* item = match_getslice(
1645 self, PyTuple_GET_ITEM(args, i), Py_None
1646 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001647 if (!item) {
1648 Py_DECREF(result);
1649 return NULL;
1650 }
1651 PyTuple_SET_ITEM(result, i, item);
1652 }
1653 break;
1654 }
1655 return result;
1656}
1657
1658static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001659match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001660{
1661 PyObject* result;
1662 int index;
1663
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001664 PyObject* def = Py_None;
1665 if (!PyArg_ParseTuple(args, "|O", &def))
1666 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001667
Guido van Rossumb700df92000-03-31 14:59:30 +00001668 result = PyTuple_New(self->groups-1);
1669 if (!result)
1670 return NULL;
1671
1672 for (index = 1; index < self->groups; index++) {
1673 PyObject* item;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001674 item = match_getslice_by_index(self, index, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001675 if (!item) {
1676 Py_DECREF(result);
1677 return NULL;
1678 }
1679 PyTuple_SET_ITEM(result, index-1, item);
1680 }
1681
1682 return result;
1683}
1684
1685static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001686match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001687{
1688 PyObject* result;
1689 PyObject* keys;
1690 int index;
1691
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001692 PyObject* def = Py_None;
1693 if (!PyArg_ParseTuple(args, "|O", &def))
1694 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001695
Guido van Rossumb700df92000-03-31 14:59:30 +00001696 result = PyDict_New();
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001697 if (!result || !self->pattern->groupindex)
Guido van Rossumb700df92000-03-31 14:59:30 +00001698 return result;
1699
1700 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001701 if (!keys) {
1702 Py_DECREF(result);
Guido van Rossumb700df92000-03-31 14:59:30 +00001703 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001704 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001705
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001706 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001707 PyObject* key;
1708 PyObject* item;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001709 key = PyList_GET_ITEM(keys, index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001710 if (!key) {
1711 Py_DECREF(keys);
1712 Py_DECREF(result);
1713 return NULL;
1714 }
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001715 item = match_getslice(self, key, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001716 if (!item) {
1717 Py_DECREF(key);
1718 Py_DECREF(keys);
1719 Py_DECREF(result);
1720 return NULL;
1721 }
1722 /* FIXME: <fl> this can fail, right? */
1723 PyDict_SetItem(result, key, item);
1724 }
1725
1726 Py_DECREF(keys);
1727
1728 return result;
1729}
1730
1731static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001732match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001733{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001734 int index;
1735
1736 PyObject* index_ = Py_False;
1737 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001738 return NULL;
1739
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001740 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001741
Guido van Rossumb700df92000-03-31 14:59:30 +00001742 if (index < 0 || index >= self->groups) {
1743 PyErr_SetString(
1744 PyExc_IndexError,
1745 "no such group"
1746 );
1747 return NULL;
1748 }
1749
1750 if (self->mark[index*2] < 0) {
1751 Py_INCREF(Py_None);
1752 return Py_None;
1753 }
1754
1755 return Py_BuildValue("i", self->mark[index*2]);
1756}
1757
1758static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001759match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001760{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001761 int index;
1762
1763 PyObject* index_ = Py_False;
1764 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001765 return NULL;
1766
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001767 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001768
Guido van Rossumb700df92000-03-31 14:59:30 +00001769 if (index < 0 || index >= self->groups) {
1770 PyErr_SetString(
1771 PyExc_IndexError,
1772 "no such group"
1773 );
1774 return NULL;
1775 }
1776
1777 if (self->mark[index*2] < 0) {
1778 Py_INCREF(Py_None);
1779 return Py_None;
1780 }
1781
1782 return Py_BuildValue("i", self->mark[index*2+1]);
1783}
1784
1785static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001786match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001787{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001788 int index;
1789
1790 PyObject* index_ = Py_False;
1791 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001792 return NULL;
1793
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001794 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001795
Guido van Rossumb700df92000-03-31 14:59:30 +00001796 if (index < 0 || index >= self->groups) {
1797 PyErr_SetString(
1798 PyExc_IndexError,
1799 "no such group"
1800 );
1801 return NULL;
1802 }
1803
1804 if (self->mark[index*2] < 0) {
1805 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001806 Py_INCREF(Py_None);
1807 return Py_BuildValue("OO", Py_None, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001808 }
1809
1810 return Py_BuildValue("ii", self->mark[index*2], self->mark[index*2+1]);
1811}
1812
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001813static PyMethodDef match_methods[] = {
1814 {"group", (PyCFunction) match_group, 1},
1815 {"start", (PyCFunction) match_start, 1},
1816 {"end", (PyCFunction) match_end, 1},
1817 {"span", (PyCFunction) match_span, 1},
1818 {"groups", (PyCFunction) match_groups, 1},
1819 {"groupdict", (PyCFunction) match_groupdict, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001820 {NULL, NULL}
1821};
1822
1823static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001824match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001825{
1826 PyObject* res;
1827
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001828 res = Py_FindMethod(match_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001829 if (res)
1830 return res;
1831
1832 PyErr_Clear();
1833
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001834 /* attributes */
Guido van Rossumb700df92000-03-31 14:59:30 +00001835 if (!strcmp(name, "string")) {
1836 Py_INCREF(self->string);
1837 return self->string;
1838 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001839
Guido van Rossumb700df92000-03-31 14:59:30 +00001840 if (!strcmp(name, "re")) {
1841 Py_INCREF(self->pattern);
1842 return (PyObject*) self->pattern;
1843 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001844
Guido van Rossumb700df92000-03-31 14:59:30 +00001845 if (!strcmp(name, "pos"))
1846 return Py_BuildValue("i", 0); /* FIXME */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001847
Guido van Rossumb700df92000-03-31 14:59:30 +00001848 if (!strcmp(name, "endpos"))
1849 return Py_BuildValue("i", 0); /* FIXME */
1850
1851 PyErr_SetString(PyExc_AttributeError, name);
1852 return NULL;
1853}
1854
1855/* FIXME: implement setattr("string", None) as a special case (to
1856 detach the associated string, if any */
1857
1858statichere PyTypeObject Match_Type = {
1859 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001860 0, "SRE_Match",
Guido van Rossumb700df92000-03-31 14:59:30 +00001861 sizeof(MatchObject), /* size of basic object */
1862 sizeof(int), /* space for group item */
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001863 (destructor)match_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001864 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001865 (getattrfunc)match_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001866};
1867
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001868/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001869/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001870
1871static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001872scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001873{
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001874 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001875 Py_DECREF(self->string);
1876 Py_DECREF(self->pattern);
1877 PyMem_DEL(self);
1878}
1879
1880static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001881scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001882{
1883 SRE_STATE* state = &self->state;
1884 PyObject* match;
1885 int status;
1886
1887 state->ptr = state->start;
1888
1889 if (state->charsize == 1) {
1890 status = sre_match(state, PatternObject_GetCode(self->pattern));
1891 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001892#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001893 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001894#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001895 }
1896
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001897 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001898 state, self->string, status);
1899
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001900 if (status == 0 || state->ptr == state->start)
1901 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001902 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001903 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001904
1905 return match;
1906}
1907
1908
1909static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001910scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001911{
1912 SRE_STATE* state = &self->state;
1913 PyObject* match;
1914 int status;
1915
1916 state->ptr = state->start;
1917
1918 if (state->charsize == 1) {
1919 status = sre_search(state, PatternObject_GetCode(self->pattern));
1920 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001921#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001922 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001923#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001924 }
1925
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001926 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001927 state, self->string, status);
1928
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001929 if (status == 0 || state->ptr == state->start)
1930 state->start = (void*) ((char*) state->ptr + state->charsize);
1931 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001932 state->start = state->ptr;
1933
1934 return match;
1935}
1936
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001937static PyMethodDef scanner_methods[] = {
1938 {"match", (PyCFunction) scanner_match, 0},
1939 {"search", (PyCFunction) scanner_search, 0},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001940 {NULL, NULL}
1941};
1942
1943static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001944scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001945{
1946 PyObject* res;
1947
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001948 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001949 if (res)
1950 return res;
1951
1952 PyErr_Clear();
1953
1954 /* attributes */
1955 if (!strcmp(name, "pattern")) {
1956 Py_INCREF(self->pattern);
1957 return self->pattern;
1958 }
1959
1960 PyErr_SetString(PyExc_AttributeError, name);
1961 return NULL;
1962}
1963
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001964statichere PyTypeObject Scanner_Type = {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001965 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001966 0, "SRE_Scanner",
1967 sizeof(ScannerObject), /* size of basic object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001968 0,
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001969 (destructor)scanner_dealloc, /*tp_dealloc*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001970 0, /*tp_print*/
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001971 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001972};
1973
Guido van Rossumb700df92000-03-31 14:59:30 +00001974static PyMethodDef _functions[] = {
1975 {"compile", _compile, 1},
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001976 {"getcodesize", sre_codesize, 1},
Fredrik Lundhb389df32000-06-29 12:48:37 +00001977 {"getlower", sre_getlower, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001978 {NULL, NULL}
1979};
1980
1981void
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001982#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00001983__declspec(dllexport)
1984#endif
1985init_sre()
1986{
1987 /* Patch object types */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001988 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001989 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001990
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001991 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00001992}
1993
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001994#endif /* !defined(SRE_RECURSIVE) */