blob: 46fe4ed67952db9f218a242db93236979eb2cef1 [file] [log] [blame]
Guido van Rossumb700df92000-03-31 14:59:30 +00001/* -*- Mode: C; tab-width: 4 -*-
2 *
3 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00004 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00005 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00006 *
7 * partial history:
Fredrik Lundh436c3d52000-06-29 08:58:44 +00008 * 99-10-24 fl created (based on existing template matcher code)
Guido van Rossumb700df92000-03-31 14:59:30 +00009 * 99-11-13 fl added categories, branching, and more (0.2)
10 * 99-11-16 fl some tweaks to compile on non-Windows platforms
11 * 99-12-18 fl non-literals, generic maximizing repeat (0.3)
Fredrik Lundh436c3d52000-06-29 08:58:44 +000012 * 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
13 * 00-03-06 fl first alpha, sort of (0.5)
14 * 00-03-14 fl removed most compatibility stuff (0.6)
15 * 00-05-10 fl towards third alpha (0.8.2)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000016 * 00-05-13 fl added experimental scanner stuff (0.8.3)
Fredrik Lundh436c3d52000-06-29 08:58:44 +000017 * 00-05-27 fl final bug hunt (0.8.4)
18 * 00-06-21 fl less bugs, more taste (0.8.5)
19 * 00-06-25 fl major changes to better deal with nested repeats (0.9)
20 * 00-06-28 fl fixed findall (0.9.1)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000021 * 00-06-29 fl fixed split, added more scanner features (0.9.2)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000022 * 00-06-30 fl tuning, fast search (0.9.3)
Fredrik Lundh0640e112000-06-30 13:55:15 +000023 * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
Guido van Rossumb700df92000-03-31 14:59:30 +000024 *
25 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
26 *
Guido van Rossumb700df92000-03-31 14:59:30 +000027 * Portions of this engine have been developed in cooperation with
28 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
29 * other compatibility work.
30 */
31
32#ifndef SRE_RECURSIVE
33
Fredrik Lundh43b3b492000-06-30 10:41:31 +000034char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000035
36#include "Python.h"
37
38#include "sre.h"
39
Guido van Rossumb700df92000-03-31 14:59:30 +000040#if defined(HAVE_LIMITS_H)
41#include <limits.h>
42#else
43#define INT_MAX 2147483647
44#endif
45
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000046#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000047
Fredrik Lundh436c3d52000-06-29 08:58:44 +000048/* name of this module, minus the leading underscore */
49#define MODULE "sre"
50
Guido van Rossumb700df92000-03-31 14:59:30 +000051/* defining this one enables tracing */
52#undef DEBUG
53
Fredrik Lundh436c3d52000-06-29 08:58:44 +000054#if PY_VERSION_HEX >= 0x01060000
55/* defining this enables unicode support (default under 1.6) */
56#define HAVE_UNICODE
57#endif
58
Fredrik Lundh29c08be2000-06-29 23:33:12 +000059/* optional features */
60#define USE_FAST_SEARCH
61
Fredrik Lundh80946112000-06-29 18:03:25 +000062#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000063#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
64/* fastest possible local call under MSVC */
65#define LOCAL(type) static __inline type __fastcall
66#else
Fredrik Lundh29c08be2000-06-29 23:33:12 +000067#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000068#endif
69
70/* error codes */
71#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
72#define SRE_ERROR_MEMORY -9 /* out of memory */
73
Fredrik Lundh436c3d52000-06-29 08:58:44 +000074#if defined(DEBUG)
Guido van Rossumb700df92000-03-31 14:59:30 +000075#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000076#else
77#define TRACE(v)
78#endif
79
Fredrik Lundh436c3d52000-06-29 08:58:44 +000080#define PTR(ptr) ((SRE_CHAR*) (ptr) - (SRE_CHAR*) state->beginning)
Guido van Rossumb700df92000-03-31 14:59:30 +000081
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000082/* -------------------------------------------------------------------- */
83/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000084
Fredrik Lundh436c3d52000-06-29 08:58:44 +000085/* default character predicates (run sre_chars.py to regenerate tables) */
86
87#define SRE_DIGIT_MASK 1
88#define SRE_SPACE_MASK 2
89#define SRE_LINEBREAK_MASK 4
90#define SRE_ALNUM_MASK 8
91#define SRE_WORD_MASK 16
92
93static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
942, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
950, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
9625, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9724, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
980, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
9924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
100
Fredrik Lundhb389df32000-06-29 12:48:37 +0000101static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000010210, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
10327, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
10444, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
10561, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
106108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
107122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
108106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
109120, 121, 122, 123, 124, 125, 126, 127 };
110
Fredrik Lundhb389df32000-06-29 12:48:37 +0000111static unsigned int sre_lower(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000112{
Fredrik Lundhb389df32000-06-29 12:48:37 +0000113 return ((ch) < 128 ? sre_char_lower[ch] : ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000114}
115
116#define SRE_IS_DIGIT(ch)\
117 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
118#define SRE_IS_SPACE(ch)\
119 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
120#define SRE_IS_LINEBREAK(ch)\
121 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
122#define SRE_IS_ALNUM(ch)\
123 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
124#define SRE_IS_WORD(ch)\
125 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000126
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000127/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000128
Fredrik Lundhb389df32000-06-29 12:48:37 +0000129static unsigned int sre_lower_locale(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000130{
131 return ((ch) < 256 ? tolower((ch)) : ch);
132}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000133#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
134#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
135#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
136#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
137#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
138
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000139/* unicode-specific character predicates */
140
141#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000142static unsigned int sre_lower_unicode(unsigned int ch)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000143{
144 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
145}
146#define SRE_UNI_TO_LOWER(ch) Py_UNICODE_TOLOWER((Py_UNICODE)(ch))
147#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
148#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
149#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
150#define SRE_UNI_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
151#define SRE_UNI_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
152#endif
153
Guido van Rossumb700df92000-03-31 14:59:30 +0000154LOCAL(int)
155sre_category(SRE_CODE category, unsigned int ch)
156{
157 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000158
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000159 case SRE_CATEGORY_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000160 return SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000161 case SRE_CATEGORY_NOT_DIGIT:
Guido van Rossumb700df92000-03-31 14:59:30 +0000162 return !SRE_IS_DIGIT(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163 case SRE_CATEGORY_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000164 return SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165 case SRE_CATEGORY_NOT_SPACE:
Guido van Rossumb700df92000-03-31 14:59:30 +0000166 return !SRE_IS_SPACE(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000167 case SRE_CATEGORY_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000168 return SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000169 case SRE_CATEGORY_NOT_WORD:
Guido van Rossumb700df92000-03-31 14:59:30 +0000170 return !SRE_IS_WORD(ch);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000171 case SRE_CATEGORY_LINEBREAK:
172 return SRE_IS_LINEBREAK(ch);
173 case SRE_CATEGORY_NOT_LINEBREAK:
174 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000175
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000176 case SRE_CATEGORY_LOC_WORD:
177 return SRE_LOC_IS_WORD(ch);
178 case SRE_CATEGORY_LOC_NOT_WORD:
179 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000180
181#if defined(HAVE_UNICODE)
182 case SRE_CATEGORY_UNI_DIGIT:
183 return SRE_UNI_IS_DIGIT(ch);
184 case SRE_CATEGORY_UNI_NOT_DIGIT:
185 return !SRE_UNI_IS_DIGIT(ch);
186 case SRE_CATEGORY_UNI_SPACE:
187 return SRE_UNI_IS_SPACE(ch);
188 case SRE_CATEGORY_UNI_NOT_SPACE:
189 return !SRE_UNI_IS_SPACE(ch);
190 case SRE_CATEGORY_UNI_WORD:
191 return SRE_UNI_IS_WORD(ch);
192 case SRE_CATEGORY_UNI_NOT_WORD:
193 return !SRE_UNI_IS_WORD(ch);
194 case SRE_CATEGORY_UNI_LINEBREAK:
195 return SRE_UNI_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
197 return !SRE_UNI_IS_LINEBREAK(ch);
198#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000199 }
200 return 0;
201}
202
203/* helpers */
204
205LOCAL(int)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000206stack_free(SRE_STATE* state)
Guido van Rossumb700df92000-03-31 14:59:30 +0000207{
208 if (state->stack) {
209 TRACE(("release stack\n"));
210 free(state->stack);
211 state->stack = NULL;
212 }
213 state->stacksize = 0;
214 return 0;
215}
216
217static int /* shouldn't be LOCAL */
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000218stack_extend(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000219{
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000220 SRE_STACK* stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000221 int stacksize;
222
223 /* grow the stack to a suitable size; we need at least lo entries,
224 at most hi entries. if for some reason hi is lower than lo, lo
225 wins */
226
227 stacksize = state->stacksize;
228
229 if (stacksize == 0) {
230 /* create new stack */
231 stacksize = 512;
232 if (stacksize < lo)
233 stacksize = lo;
234 else if (stacksize > hi)
235 stacksize = hi;
236 TRACE(("allocate stack %d\n", stacksize));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000237 stack = malloc(sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000238 } else {
239 /* grow the stack (typically by a factor of two) */
240 while (stacksize < lo)
241 stacksize = 2 * stacksize;
242 /* FIXME: <fl> could trim size if it's larger than lo, and
243 much larger than hi */
244 TRACE(("grow stack to %d\n", stacksize));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000245 stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
Guido van Rossumb700df92000-03-31 14:59:30 +0000246 }
247
248 if (!stack) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000249 stack_free(state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000250 return SRE_ERROR_MEMORY;
251 }
252
253 state->stack = stack;
254 state->stacksize = stacksize;
255
256 return 0;
257}
258
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000259/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000260
261#define SRE_CHAR unsigned char
262#define SRE_AT sre_at
263#define SRE_MEMBER sre_member
264#define SRE_MATCH sre_match
265#define SRE_SEARCH sre_search
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000266
267#if defined(HAVE_UNICODE)
268
Guido van Rossumb700df92000-03-31 14:59:30 +0000269#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000270#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000271#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000272
Guido van Rossumb700df92000-03-31 14:59:30 +0000273#undef SRE_SEARCH
274#undef SRE_MATCH
275#undef SRE_MEMBER
276#undef SRE_AT
277#undef SRE_CHAR
278
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000279/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000280
281#define SRE_CHAR Py_UNICODE
282#define SRE_AT sre_uat
283#define SRE_MEMBER sre_umember
284#define SRE_MATCH sre_umatch
285#define SRE_SEARCH sre_usearch
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000286#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000287
288#endif /* SRE_RECURSIVE */
289
290/* -------------------------------------------------------------------- */
291/* String matching engine */
292
293/* the following section is compiled twice, with different character
294 settings */
295
296LOCAL(int)
297SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
298{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000299 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000300
301 int this, that;
302
303 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000304
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000305 case SRE_AT_BEGINNING:
Guido van Rossum29530882000-04-10 17:06:55 +0000306 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000307
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000308 case SRE_AT_BEGINNING_LINE:
309 return ((void*) ptr == state->beginning ||
310 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000311
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000312 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000313 return (((void*) (ptr+1) == state->end &&
314 SRE_IS_LINEBREAK((int) ptr[0])) ||
315 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000316
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000317 case SRE_AT_END_LINE:
318 return ((void*) ptr == state->end ||
319 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000320
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000321 case SRE_AT_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000322 if (state->beginning == state->end)
323 return 0;
324 that = ((void*) ptr > state->beginning) ?
325 SRE_IS_WORD((int) ptr[-1]) : 0;
326 this = ((void*) ptr < state->end) ?
327 SRE_IS_WORD((int) ptr[0]) : 0;
328 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000329
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000330 case SRE_AT_NON_BOUNDARY:
Guido van Rossumb700df92000-03-31 14:59:30 +0000331 if (state->beginning == state->end)
332 return 0;
333 that = ((void*) ptr > state->beginning) ?
334 SRE_IS_WORD((int) ptr[-1]) : 0;
335 this = ((void*) ptr < state->end) ?
336 SRE_IS_WORD((int) ptr[0]) : 0;
337 return this == that;
338 }
339
340 return 0;
341}
342
343LOCAL(int)
Fredrik Lundh0640e112000-06-30 13:55:15 +0000344SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000345{
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000346 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000347
348 int ok = 1;
349
350 for (;;) {
351 switch (*set++) {
352
353 case SRE_OP_NEGATE:
354 ok = !ok;
355 break;
356
357 case SRE_OP_FAILURE:
358 return !ok;
359
360 case SRE_OP_LITERAL:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000361 if (ch == set[0])
Guido van Rossumb700df92000-03-31 14:59:30 +0000362 return ok;
363 set++;
364 break;
365
366 case SRE_OP_RANGE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000367 if (set[0] <= ch && ch <= set[1])
Guido van Rossumb700df92000-03-31 14:59:30 +0000368 return ok;
369 set += 2;
370 break;
371
372 case SRE_OP_CATEGORY:
373 if (sre_category(set[0], (int) ch))
374 return ok;
375 set += 1;
376 break;
377
378 default:
Fredrik Lundh80946112000-06-29 18:03:25 +0000379 /* internal error -- there's not much we can do about it
380 here, so let's just pretend it didn't match... */
Guido van Rossumb700df92000-03-31 14:59:30 +0000381 return 0;
382 }
383 }
384}
385
386LOCAL(int)
387SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
388{
389 /* check if string matches the given pattern. returns -1 for
390 error, 0 for failure, and 1 for success */
391
392 SRE_CHAR* end = state->end;
393 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000394 int stack;
Guido van Rossumb700df92000-03-31 14:59:30 +0000395 int stackbase;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000396 int lastmark;
Guido van Rossumb700df92000-03-31 14:59:30 +0000397 int i, count;
398
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000399 /* FIXME: this is a hack! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000400 void* mark_copy[SRE_MARK_SIZE];
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000401 void* mark = NULL;
402
403 TRACE(("%8d: enter\n", PTR(ptr)));
404
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000405 if (pattern[0] == SRE_OP_INFO) {
406 /* optimization info block */
407 /* args: <1=skip> <2=flags> <3=min> ... */
408 if (pattern[3] && (end - ptr) < pattern[3]) {
409 TRACE(("reject (got %d chars, need %d)\n",
410 (end - ptr), pattern[3]));
411 return 0;
412 }
413 pattern += pattern[1] + 1;
414 }
415
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000416 stackbase = stack = state->stackbase;
417 lastmark = state->lastmark;
418
419 retry:
Guido van Rossumb700df92000-03-31 14:59:30 +0000420
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000421 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000422
423 switch (*pattern++) {
424
425 case SRE_OP_FAILURE:
426 /* immediate failure */
427 TRACE(("%8d: failure\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000428 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000429
430 case SRE_OP_SUCCESS:
431 /* end of pattern */
432 TRACE(("%8d: success\n", PTR(ptr)));
433 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000434 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000435
436 case SRE_OP_AT:
437 /* match at given position */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000438 /* args: <at> */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000439 TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
Guido van Rossumb700df92000-03-31 14:59:30 +0000440 if (!SRE_AT(state, ptr, *pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000441 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000442 pattern++;
443 break;
444
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000445 case SRE_OP_CATEGORY:
446 /* match at given category */
447 /* args: <category> */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000448 TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
449 *ptr, *pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000450 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
451 goto failure;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000452 TRACE(("%8d: category ok\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000453 pattern++;
454 ptr++;
455 break;
456
Guido van Rossumb700df92000-03-31 14:59:30 +0000457 case SRE_OP_LITERAL:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000458 /* match literal string */
Guido van Rossumb700df92000-03-31 14:59:30 +0000459 /* args: <code> */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000460 TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
461 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000462 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000463 pattern++;
464 ptr++;
465 break;
466
467 case SRE_OP_NOT_LITERAL:
468 /* match anything that is not literal character */
469 /* args: <code> */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000470 TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
471 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000472 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000473 pattern++;
474 ptr++;
475 break;
476
477 case SRE_OP_ANY:
478 /* match anything */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000479 TRACE(("%8d: anything\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000480 if (ptr >= end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000481 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000482 ptr++;
483 break;
484
485 case SRE_OP_IN:
486 /* match set member (or non_member) */
487 /* args: <skip> <set> */
488 TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
489 if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000490 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000491 pattern += pattern[0];
492 ptr++;
493 break;
494
495 case SRE_OP_GROUP:
496 /* match backreference */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000497 TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000498 i = pattern[0];
499 {
Guido van Rossumb700df92000-03-31 14:59:30 +0000500 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
501 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
502 if (!p || !e || e < p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000503 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000504 while (p < e) {
505 if (ptr >= end || *ptr != *p)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000506 goto failure;
507 p++; ptr++;
508 }
509 }
510 pattern++;
511 break;
512
513 case SRE_OP_GROUP_IGNORE:
514 /* match backreference */
515 TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
516 i = pattern[0];
517 {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000518 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
519 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000520 if (!p || !e || e < p)
521 goto failure;
522 while (p < e) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000523 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000524 state->lower(*ptr) != state->lower(*p))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000525 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000526 p++; ptr++;
527 }
528 }
529 pattern++;
530 break;
531
532 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000533 TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000534 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000535 state->lower(*ptr) != state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000536 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000537 pattern++;
538 ptr++;
539 break;
540
541 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh0640e112000-06-30 13:55:15 +0000542 TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000543 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000544 state->lower(*ptr) == state->lower(*pattern))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000545 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000546 pattern++;
547 ptr++;
548 break;
549
550 case SRE_OP_IN_IGNORE:
551 TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
552 if (ptr >= end
Fredrik Lundh0640e112000-06-30 13:55:15 +0000553 || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000554 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000555 pattern += pattern[0];
556 ptr++;
557 break;
558
559 case SRE_OP_MARK:
560 /* set mark */
561 /* args: <mark> */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000562 TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
563 if (state->lastmark < pattern[0])
564 state->lastmark = pattern[0];
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000565 if (!mark) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000566 mark = mark_copy;
567 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000568 }
569 state->mark[pattern[0]] = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000570 pattern++;
571 break;
572
573 case SRE_OP_JUMP:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000574 case SRE_OP_INFO:
Guido van Rossumb700df92000-03-31 14:59:30 +0000575 /* jump forward */
576 /* args: <skip> */
577 TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
578 pattern += pattern[0];
579 break;
580
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000581 case SRE_OP_ASSERT:
582 /* assert subpattern */
Guido van Rossumb700df92000-03-31 14:59:30 +0000583 /* args: <skip> <pattern> */
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000584 TRACE(("%8d: assert subpattern\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +0000585 state->ptr = ptr;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000586 i = SRE_MATCH(state, pattern + 1);
587 if (i < 0)
588 return i;
589 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000590 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000591 pattern += pattern[0];
Guido van Rossumb700df92000-03-31 14:59:30 +0000592 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000593
594 case SRE_OP_ASSERT_NOT:
595 /* assert not subpattern */
596 /* args: <skip> <pattern> */
597 TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
598 state->ptr = ptr;
599 i = SRE_MATCH(state, pattern + 1);
600 if (i < 0)
601 return i;
602 if (i)
603 goto failure;
604 pattern += pattern[0];
605 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000606
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000607#if 0
Guido van Rossumb700df92000-03-31 14:59:30 +0000608 case SRE_OP_MAX_REPEAT_ONE:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000609 /* match repeated sequence (maximizing regexp) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000610
611 /* this operator only works if the repeated item is
612 exactly one character wide, and we're not already
613 collecting backtracking points. for other cases,
614 use the MAX_REPEAT operator instead */
615
Guido van Rossumb700df92000-03-31 14:59:30 +0000616 /* args: <skip> <min> <max> <step> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000617 TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
618 pattern[1], pattern[2]));
619
620 count = 0;
621
622 if (pattern[3] == SRE_OP_ANY) {
623 /* repeated wildcard. skip to the end of the target
624 string, and backtrack from there */
625 /* FIXME: must look for line endings */
626 if (ptr + pattern[1] > end)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000627 goto failure; /* cannot match */
Guido van Rossumb700df92000-03-31 14:59:30 +0000628 count = pattern[2];
629 if (count > end - ptr)
630 count = end - ptr;
631 ptr += count;
632
633 } else if (pattern[3] == SRE_OP_LITERAL) {
634 /* repeated literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000635 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000636 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000637 if (ptr >= end || (SRE_CODE) ptr[0] != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000638 break;
639 ptr++;
640 count++;
641 }
642
643 } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
644 /* repeated literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000645 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000646 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000647 if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000648 break;
649 ptr++;
650 count++;
651 }
652
653 } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
654 /* repeated non-literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000655 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000656 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000657 if (ptr >= end || (SRE_CODE) ptr[0] == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000658 break;
659 ptr++;
660 count++;
661 }
662
663 } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
664 /* repeated non-literal */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000665 SRE_CODE chr = pattern[4];
Guido van Rossumb700df92000-03-31 14:59:30 +0000666 while (count < (int) pattern[2]) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000667 if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
Guido van Rossumb700df92000-03-31 14:59:30 +0000668 break;
669 ptr++;
670 count++;
671 }
672
673 } else if (pattern[3] == SRE_OP_IN) {
674 /* repeated set */
675 while (count < (int) pattern[2]) {
676 if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
677 break;
678 ptr++;
679 count++;
680 }
681
682 } else {
683 /* repeated single character pattern */
684 state->ptr = ptr;
685 while (count < (int) pattern[2]) {
686 i = SRE_MATCH(state, pattern + 3);
687 if (i < 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000688 return i;
689 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000690 break;
691 count++;
692 }
693 state->ptr = ptr;
694 ptr += count;
695 }
696
697 /* when we arrive here, count contains the number of
698 matches, and ptr points to the tail of the target
699 string. check if the rest of the pattern matches, and
700 backtrack if not. */
701
Guido van Rossumb700df92000-03-31 14:59:30 +0000702 TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
703
704 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000705 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000706
707 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
708 /* tail is empty. we're finished */
709 TRACE(("%8d: tail is empty\n", PTR(ptr)));
710 state->ptr = ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000711 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000712
713 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000714 /* tail starts with a literal. skip positions where
715 the rest of the pattern cannot possibly match */
Fredrik Lundh0640e112000-06-30 13:55:15 +0000716 SRE_CODE chr = pattern[pattern[0]+1];
Guido van Rossumb700df92000-03-31 14:59:30 +0000717 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
718 for (;;) {
719 TRACE(("%8d: scan for tail match\n", PTR(ptr)));
720 while (count >= (int) pattern[1] &&
721 (ptr >= end || *ptr != chr)) {
722 ptr--;
723 count--;
724 }
725 TRACE(("%8d: check tail\n", PTR(ptr)));
726 if (count < (int) pattern[1])
727 break;
728 state->ptr = ptr;
729 i = SRE_MATCH(state, pattern + pattern[0]);
730 if (i > 0) {
731 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000732 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000733 }
734 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
735 ptr--;
736 count--;
737 }
738
739 } else {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000740 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +0000741 TRACE(("%8d: tail is pattern\n", PTR(ptr)));
742 while (count >= (int) pattern[1]) {
743 state->ptr = ptr;
744 i = SRE_MATCH(state, pattern + pattern[0]);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000745 if (i < 0)
746 return i;
747 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000748 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000749 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000750 }
751 TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
752 ptr--;
753 count--;
754 }
755 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000756 goto failure;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000757#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000758
759 case SRE_OP_MAX_REPEAT:
760 /* match repeated sequence (maximizing regexp). repeated
761 group should end with a MAX_UNTIL code */
762
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000763 /* args: <skip> <min> <max> <item> */
764
765 TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
Guido van Rossumb700df92000-03-31 14:59:30 +0000766 pattern[1], pattern[2]));
767
768 count = 0;
769 state->ptr = ptr;
770
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000771 /* match minimum number of items */
772 while (count < (int) pattern[1]) {
773 i = SRE_MATCH(state, pattern + 3);
774 if (i < 0)
775 return i;
776 if (!i)
777 goto failure;
778 if (state->ptr == ptr) {
779 /* if the match was successful but empty, set the
780 count to max and terminate the scanning loop */
781 count = (int) pattern[2];
782 break;
783 }
784 count++;
785 ptr = state->ptr;
786 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000787
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000788 TRACE(("%8d: found %d leading items\n", PTR(ptr), count));
Guido van Rossumb700df92000-03-31 14:59:30 +0000789
790 if (count < (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000791 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000792
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000793 /* match maximum number of items, pushing alternate end
794 points to the stack */
Guido van Rossumb700df92000-03-31 14:59:30 +0000795
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000796 while (pattern[2] == 32767 || count < (int) pattern[2]) {
797 state->stackbase = stack;
798 i = SRE_MATCH(state, pattern + 3);
799 state->stackbase = stackbase; /* rewind */
800 if (i < 0)
801 return i;
802 if (!i)
803 break;
804 if (state->ptr == ptr) {
805 count = (int) pattern[2];
806 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000807 }
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000808 /* this position was valid; add it to the retry
809 stack */
810 if (stack >= state->stacksize) {
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000811 i = stack_extend(state, stack + 1,
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000812 stackbase + pattern[2]);
813 if (i < 0)
814 return i; /* out of memory */
815 }
816 TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
817 state->stack[stack].ptr = ptr;
818 state->stack[stack].pattern = pattern + pattern[0];
819 stack++;
820 /* move forward */
821 ptr = state->ptr;
822 count++;
Guido van Rossumb700df92000-03-31 14:59:30 +0000823 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000824
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000825 /* when we get here, count is the number of successful
826 matches, and ptr points to the tail. */
Guido van Rossumb700df92000-03-31 14:59:30 +0000827
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000828 TRACE(("%8d: skip +%d\n", PTR(ptr), pattern[0]));
829
830 pattern += pattern[0];
831 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000832
833 case SRE_OP_MIN_REPEAT:
834 /* match repeated sequence (minimizing regexp) */
835 TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
836 pattern[1], pattern[2]));
837 count = 0;
838 state->ptr = ptr;
839 /* match minimum number of items */
840 while (count < (int) pattern[1]) {
841 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000842 if (i < 0)
843 return i;
844 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000845 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000846 count++;
847 }
848 /* move forward until the tail matches. */
849 while (count <= (int) pattern[2]) {
850 ptr = state->ptr;
851 i = SRE_MATCH(state, pattern + pattern[0]);
852 if (i > 0) {
853 TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000854 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000855 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000856 state->ptr = ptr; /* backtrack */
857 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000858 if (i < 0)
859 return i;
860 if (!i)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000861 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000862 count++;
863 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000864 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000865
Guido van Rossumb700df92000-03-31 14:59:30 +0000866 case SRE_OP_BRANCH:
867 /* match one of several subpatterns */
868 /* format: <branch> <size> <head> ... <null> <tail> */
869 TRACE(("%8d: branch\n", PTR(ptr)));
870 while (*pattern) {
871 if (pattern[1] != SRE_OP_LITERAL ||
Fredrik Lundh0640e112000-06-30 13:55:15 +0000872 (ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000873 TRACE(("%8d: branch check\n", PTR(ptr)));
874 state->ptr = ptr;
875 i = SRE_MATCH(state, pattern + 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000876 if (i < 0)
877 return i;
878 if (i) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000879 TRACE(("%8d: branch succeeded\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000880 goto success;
Guido van Rossumb700df92000-03-31 14:59:30 +0000881 }
882 }
883 pattern += *pattern;
884 }
885 TRACE(("%8d: branch failed\n", PTR(ptr)));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000886 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000887
888 case SRE_OP_REPEAT:
889 /* TEMPLATE: match repeated sequence (no backtracking) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000890 /* args: <skip> <min> <max> */
Guido van Rossumb700df92000-03-31 14:59:30 +0000891 TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
892 count = 0;
893 state->ptr = ptr;
894 while (count < (int) pattern[2]) {
895 i = SRE_MATCH(state, pattern + 3);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000896 if (i < 0)
897 return i;
898 if (!i)
Guido van Rossumb700df92000-03-31 14:59:30 +0000899 break;
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000900 if (state->ptr == ptr) {
901 count = (int) pattern[2];
902 break;
903 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000904 count++;
905 }
906 if (count <= (int) pattern[1])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000907 goto failure;
Guido van Rossumb700df92000-03-31 14:59:30 +0000908 TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
909 pattern += pattern[0];
910 ptr = state->ptr;
911 break;
912
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000913 default:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000914 TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
Guido van Rossumb700df92000-03-31 14:59:30 +0000915 return SRE_ERROR_ILLEGAL;
916 }
917 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000918
919 failure:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000920 if (stack-- > stackbase) {
921 ptr = state->stack[stack].ptr;
922 pattern = state->stack[stack].pattern;
923 TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
924 goto retry;
925 }
926 TRACE(("%8d: leave (failure)\n", PTR(ptr)));
927 state->stackbase = stackbase;
928 state->lastmark = lastmark;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000929 if (mark)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000930 memcpy(state->mark, mark, state->lastmark*sizeof(void*));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000931 return 0;
932
933 success:
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000934 TRACE(("%8d: leave (success)\n", PTR(ptr)));
935 state->stackbase = stackbase;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000936 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000937}
938
939LOCAL(int)
940SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
941{
942 SRE_CHAR* ptr = state->start;
943 SRE_CHAR* end = state->end;
944 int status = 0;
Fredrik Lundh80946112000-06-29 18:03:25 +0000945 int prefix_len = 0;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000946 SRE_CODE* prefix;
947 SRE_CODE* overlap;
948 int literal = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000949
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000950 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000951 /* optimization info block */
952 /* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
953
954 if (pattern[3] > 0) {
955 /* adjust end point (but make sure we leave at least one
956 character in there) */
957 end -= pattern[3]-1;
958 if (end <= ptr)
959 end = ptr+1;
960 }
961
962 literal = pattern[2];
963
964 prefix = pattern + 6;
965 prefix_len = pattern[5];
966
967 overlap = prefix + prefix_len - 1;
968
969 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000970 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000971
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000972#if defined(USE_FAST_SEARCH)
973 if (prefix_len > 1) {
974 /* pattern starts with a known prefix. use the overlap
975 table to skip forward as fast as we possibly can */
976 int i = 0;
977 end = state->end;
978 while (ptr < end) {
979 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +0000980 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000981 if (!i)
982 break;
983 else
984 i = overlap[i];
985 } else {
986 if (++i == prefix_len) {
987 /* found a potential match */
988 TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
989 state->start = ptr - prefix_len + 1;
990 state->ptr = ptr + 1;
991 if (literal)
992 return 1; /* all of it */
993 status = SRE_MATCH(state, pattern + 2*prefix_len);
994 if (status != 0)
995 return status;
996 /* close but no cigar -- try again */
997 i = overlap[i];
998 }
999 break;
1000 }
1001
1002 }
1003 ptr++;
1004 }
1005 return 0;
1006 }
1007#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001008
Guido van Rossumb700df92000-03-31 14:59:30 +00001009 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001010 /* pattern starts with a literal character. this is used for
1011 short prefixes, and if fast search is disabled*/
Fredrik Lundh0640e112000-06-30 13:55:15 +00001012 SRE_CODE chr = pattern[1];
Guido van Rossumb700df92000-03-31 14:59:30 +00001013 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001014 while (ptr < end && (SRE_CODE) ptr[0] != chr)
Guido van Rossumb700df92000-03-31 14:59:30 +00001015 ptr++;
1016 if (ptr == end)
1017 return 0;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001018 TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001019 state->start = ptr;
1020 state->ptr = ++ptr;
1021 status = SRE_MATCH(state, pattern + 2);
1022 if (status != 0)
1023 break;
1024 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001025 } else
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001026 /* general case */
Guido van Rossumb700df92000-03-31 14:59:30 +00001027 while (ptr <= end) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001028 TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
Guido van Rossumb700df92000-03-31 14:59:30 +00001029 state->start = state->ptr = ptr++;
1030 status = SRE_MATCH(state, pattern);
1031 if (status != 0)
1032 break;
1033 }
1034
1035 return status;
1036}
1037
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001038#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001039
1040/* -------------------------------------------------------------------- */
1041/* factories and destructors */
1042
1043/* see sre.h for object declarations */
1044
1045staticforward PyTypeObject Pattern_Type;
1046staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001047staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001048
1049static PyObject *
1050_compile(PyObject* self_, PyObject* args)
1051{
1052 /* "compile" pattern descriptor to pattern object */
1053
1054 PatternObject* self;
1055
1056 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001057 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001058 PyObject* code;
1059 int groups = 0;
1060 PyObject* groupindex = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001061 if (!PyArg_ParseTuple(args, "OiO!|iO", &pattern, &flags,
1062 &PyString_Type, &code,
1063 &groups, &groupindex))
Guido van Rossumb700df92000-03-31 14:59:30 +00001064 return NULL;
1065
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001066 self = PyObject_NEW(PatternObject, &Pattern_Type);
Guido van Rossumb700df92000-03-31 14:59:30 +00001067 if (self == NULL)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001068
Guido van Rossumb700df92000-03-31 14:59:30 +00001069 return NULL;
1070
1071 Py_INCREF(pattern);
1072 self->pattern = pattern;
1073
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001074 self->flags = flags;
1075
Guido van Rossumb700df92000-03-31 14:59:30 +00001076 Py_INCREF(code);
1077 self->code = code;
1078
1079 self->groups = groups;
1080
1081 Py_XINCREF(groupindex);
1082 self->groupindex = groupindex;
1083
1084 return (PyObject*) self;
1085}
1086
1087static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001088sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001089{
1090 return Py_BuildValue("i", sizeof(SRE_CODE));
1091}
1092
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001093static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001094sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001095{
1096 int character, flags;
1097 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1098 return NULL;
1099 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001100 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001101#if defined(HAVE_UNICODE)
1102 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001103 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001104#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001105 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001106}
1107
Guido van Rossumb700df92000-03-31 14:59:30 +00001108LOCAL(PyObject*)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001109state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001110{
1111 /* prepare state object */
1112
1113 PyBufferProcs *buffer;
1114 int i, count;
1115 void* ptr;
1116
1117 PyObject* string;
1118 int start = 0;
1119 int end = INT_MAX;
1120 if (!PyArg_ParseTuple(args, "O|ii", &string, &start, &end))
1121 return NULL;
1122
1123 /* get pointer to string buffer */
1124 buffer = string->ob_type->tp_as_buffer;
1125 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1126 buffer->bf_getsegcount(string, NULL) != 1) {
1127 PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
1128 return NULL;
1129 }
1130
1131 /* determine buffer size */
1132 count = buffer->bf_getreadbuffer(string, 0, &ptr);
1133 if (count < 0) {
1134 /* sanity check */
1135 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1136 return NULL;
1137 }
1138
1139 /* determine character size */
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001140#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001141 state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001142#else
1143 state->charsize = 1;
1144#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001145
1146 count /= state->charsize;
1147
1148 /* adjust boundaries */
1149 if (start < 0)
1150 start = 0;
1151 else if (start > count)
1152 start = count;
1153
1154 if (end < 0)
1155 end = 0;
1156 else if (end > count)
1157 end = count;
1158
1159 state->beginning = ptr;
1160
1161 state->start = (void*) ((char*) ptr + start * state->charsize);
1162 state->end = (void*) ((char*) ptr + end * state->charsize);
1163
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001164 state->lastmark = 0;
1165
Guido van Rossumb700df92000-03-31 14:59:30 +00001166 /* FIXME: dynamic! */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001167 for (i = 0; i < SRE_MARK_SIZE; i++)
Guido van Rossumb700df92000-03-31 14:59:30 +00001168 state->mark[i] = NULL;
1169
1170 state->stack = NULL;
1171 state->stackbase = 0;
1172 state->stacksize = 0;
1173
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001174 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001175 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001176#if defined(HAVE_UNICODE)
1177 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001178 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001179#endif
1180 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001181 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001182
Guido van Rossumb700df92000-03-31 14:59:30 +00001183 return string;
1184}
1185
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001186LOCAL(void)
1187state_fini(SRE_STATE* state)
1188{
1189 stack_free(state);
1190}
1191
1192LOCAL(PyObject*)
1193state_getslice(SRE_STATE* state, int index, PyObject* string)
1194{
1195 index = (index - 1) * 2;
1196
1197 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
1198 Py_INCREF(Py_None);
1199 return Py_None;
1200 }
1201
1202 return PySequence_GetSlice(
1203 string,
1204 ((char*)state->mark[index] - (char*)state->beginning) /
1205 state->charsize,
1206 ((char*)state->mark[index+1] - (char*)state->beginning) /
1207 state->charsize
1208 );
1209}
1210
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001211static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001212pattern_new_match(PatternObject* pattern, SRE_STATE* state,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001213 PyObject* string, int status)
1214{
1215 /* create match object (from state object) */
1216
1217 MatchObject* match;
1218 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001219 char* base;
1220 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001221
1222 if (status > 0) {
1223
1224 /* create match object (with room for extra group marks) */
1225 match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2*pattern->groups);
1226 if (match == NULL)
1227 return NULL;
1228
1229 Py_INCREF(pattern);
1230 match->pattern = pattern;
1231
1232 Py_INCREF(string);
1233 match->string = string;
1234
1235 match->groups = pattern->groups+1;
1236
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001237 base = (char*) state->beginning;
1238 n = state->charsize;
1239
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001240 /* group zero */
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001241 match->mark[0] = ((char*) state->start - base) / n;
1242 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001243
1244 /* fill in the rest of the groups */
1245 for (i = j = 0; i < pattern->groups; i++, j+=2)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001246 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1247 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1248 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001249 } else
1250 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1251
1252 return (PyObject*) match;
1253
1254 } else if (status < 0) {
1255
1256 /* internal error */
1257 PyErr_SetString(
1258 PyExc_RuntimeError, "internal error in regular expression engine"
1259 );
1260 return NULL;
1261
1262 }
1263
1264 Py_INCREF(Py_None);
1265 return Py_None;
1266}
1267
1268static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001269pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001270{
1271 /* create search state object */
1272
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001273 ScannerObject* self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001274 PyObject* string;
1275
1276 /* create match object (with room for extra group marks) */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001277 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001278 if (self == NULL)
1279 return NULL;
1280
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001281 string = state_init(&self->state, pattern, args);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001282 if (!string) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001283 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001284 return NULL;
1285 }
1286
1287 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001288 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001289
1290 Py_INCREF(string);
1291 self->string = string;
1292
1293 return (PyObject*) self;
1294}
1295
Guido van Rossumb700df92000-03-31 14:59:30 +00001296static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001297pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001298{
1299 Py_XDECREF(self->code);
1300 Py_XDECREF(self->pattern);
1301 Py_XDECREF(self->groupindex);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001302 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001303}
1304
1305static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001306pattern_match(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001307{
1308 SRE_STATE state;
1309 PyObject* string;
1310 int status;
1311
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001312 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001313 if (!string)
1314 return NULL;
1315
1316 state.ptr = state.start;
1317
1318 if (state.charsize == 1) {
1319 status = sre_match(&state, PatternObject_GetCode(self));
1320 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001321#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001322 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001323#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001324 }
1325
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001326 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001327
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001328 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001329}
1330
1331static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001332pattern_search(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001333{
1334 SRE_STATE state;
1335 PyObject* string;
1336 int status;
1337
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001338 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001339 if (!string)
1340 return NULL;
1341
1342 if (state.charsize == 1) {
1343 status = sre_search(&state, PatternObject_GetCode(self));
1344 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001345#if defined(HAVE_UNICODE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001346 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001347#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001348 }
1349
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001350 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001351
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001352 return pattern_new_match(self, &state, string, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001353}
1354
1355static PyObject*
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001356call(char* function, PyObject* args)
1357{
1358 PyObject* name;
1359 PyObject* module;
1360 PyObject* func;
1361 PyObject* result;
1362
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001363 name = PyString_FromString(MODULE);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001364 if (!name)
1365 return NULL;
1366 module = PyImport_Import(name);
1367 Py_DECREF(name);
1368 if (!module)
1369 return NULL;
1370 func = PyObject_GetAttrString(module, function);
1371 Py_DECREF(module);
1372 if (!func)
1373 return NULL;
1374 result = PyObject_CallObject(func, args);
1375 Py_DECREF(func);
1376 Py_DECREF(args);
1377 return result;
1378}
1379
1380static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001381pattern_sub(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001382{
1383 PyObject* template;
1384 PyObject* string;
1385 PyObject* count;
1386 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1387 return NULL;
1388
1389 /* delegate to Python code */
1390 return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
1391}
1392
1393static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001394pattern_subn(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001395{
1396 PyObject* template;
1397 PyObject* string;
1398 PyObject* count;
1399 if (!PyArg_ParseTuple(args, "OOO", &template, &string, &count))
1400 return NULL;
1401
1402 /* delegate to Python code */
1403 return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
1404}
1405
1406static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001407pattern_split(PatternObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001408{
1409 PyObject* string;
1410 PyObject* maxsplit;
1411 if (!PyArg_ParseTuple(args, "OO", &string, &maxsplit))
1412 return NULL;
1413
1414 /* delegate to Python code */
1415 return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
1416}
1417
1418static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001419pattern_findall(PatternObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001420{
Guido van Rossumb700df92000-03-31 14:59:30 +00001421 SRE_STATE state;
1422 PyObject* string;
1423 PyObject* list;
1424 int status;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001425 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001426
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001427 string = state_init(&state, self, args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001428 if (!string)
1429 return NULL;
1430
1431 list = PyList_New(0);
1432
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001433 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001434
1435 PyObject* item;
1436
1437 state.ptr = state.start;
1438
1439 if (state.charsize == 1) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001440 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +00001441 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001442#if defined(HAVE_UNICODE)
1443 status = sre_usearch(&state, PatternObject_GetCode(self));
1444#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001445 }
1446
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001447 if (status > 0) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001448
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001449 /* don't bother to build a match object */
1450 switch (self->groups) {
1451 case 0:
1452 item = PySequence_GetSlice(
1453 string,
1454 ((char*) state.start - (char*) state.beginning) /
1455 state.charsize,
1456 ((char*) state.ptr - (char*) state.beginning) /
1457 state.charsize);
1458 if (!item)
1459 goto error;
1460 break;
1461 case 1:
1462 item = state_getslice(&state, 1, string);
1463 if (!item)
1464 goto error;
1465 break;
1466 default:
1467 item = PyTuple_New(self->groups);
1468 if (!item)
1469 goto error;
1470 for (i = 0; i < self->groups; i++) {
1471 PyObject* o = state_getslice(&state, i+1, string);
1472 if (!o) {
1473 Py_DECREF(item);
1474 goto error;
1475 }
1476 PyTuple_SET_ITEM(item, i, o);
1477 }
1478 break;
1479 }
1480
1481 if (PyList_Append(list, item) < 0) {
1482 Py_DECREF(item);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001483 goto error;
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001484 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001485
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001486 if (state.ptr == state.start)
1487 state.start = (void*) ((char*) state.ptr + state.charsize);
1488 else
1489 state.start = state.ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001490
1491 } else {
1492
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001493 if (status == 0)
1494 break;
1495
Guido van Rossumb700df92000-03-31 14:59:30 +00001496 /* internal error */
1497 PyErr_SetString(
1498 PyExc_RuntimeError,
1499 "internal error in regular expression engine"
1500 );
1501 goto error;
1502
1503 }
1504 }
1505
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001506 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001507 return list;
1508
1509error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001510 Py_DECREF(list);
1511 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001512 return NULL;
1513
1514}
1515
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001516static PyMethodDef pattern_methods[] = {
1517 {"match", (PyCFunction) pattern_match, 1},
1518 {"search", (PyCFunction) pattern_search, 1},
1519 {"sub", (PyCFunction) pattern_sub, 1},
1520 {"subn", (PyCFunction) pattern_subn, 1},
1521 {"split", (PyCFunction) pattern_split, 1},
1522 {"findall", (PyCFunction) pattern_findall, 1},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001523 /* experimental */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001524 {"scanner", (PyCFunction) pattern_scanner, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001525 {NULL, NULL}
1526};
1527
1528static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001529pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001530{
1531 PyObject* res;
1532
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001533 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001534
1535 if (res)
1536 return res;
1537
1538 PyErr_Clear();
1539
1540 /* attributes */
1541 if (!strcmp(name, "pattern")) {
1542 Py_INCREF(self->pattern);
1543 return self->pattern;
1544 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001545
1546 if (!strcmp(name, "flags"))
1547 return Py_BuildValue("i", self->flags);
1548
Fredrik Lundh01016fe2000-06-30 00:27:46 +00001549 if (!strcmp(name, "groups"))
1550 return Py_BuildValue("i", self->groups);
1551
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001552 if (!strcmp(name, "groupindex") && self->groupindex) {
1553 Py_INCREF(self->groupindex);
1554 return self->groupindex;
1555 }
1556
Guido van Rossumb700df92000-03-31 14:59:30 +00001557 PyErr_SetString(PyExc_AttributeError, name);
1558 return NULL;
1559}
1560
1561statichere PyTypeObject Pattern_Type = {
1562 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001563 0, "SRE_Pattern", sizeof(PatternObject), 0,
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001564 (destructor)pattern_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001565 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001566 (getattrfunc)pattern_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001567};
1568
1569/* -------------------------------------------------------------------- */
1570/* match methods */
1571
1572static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001573match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001574{
1575 Py_XDECREF(self->string);
1576 Py_DECREF(self->pattern);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001577 PyMem_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001578}
1579
1580static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001581match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001582{
1583 if (index < 0 || index >= self->groups) {
1584 /* raise IndexError if we were given a bad group number */
1585 PyErr_SetString(
1586 PyExc_IndexError,
1587 "no such group"
1588 );
1589 return NULL;
1590 }
1591
1592 if (self->string == Py_None || self->mark[index+index] < 0) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001593 /* return default value if the string or group is undefined */
1594 Py_INCREF(def);
1595 return def;
Guido van Rossumb700df92000-03-31 14:59:30 +00001596 }
1597
1598 return PySequence_GetSlice(
1599 self->string, self->mark[index+index], self->mark[index+index+1]
1600 );
1601}
1602
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001603static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001604match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001605{
1606 if (!PyInt_Check(index) && self->pattern->groupindex != NULL) {
1607 /* FIXME: resource leak? */
1608 index = PyObject_GetItem(self->pattern->groupindex, index);
1609 if (!index)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001610 return -1;
Guido van Rossumb700df92000-03-31 14:59:30 +00001611 }
1612
1613 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001614 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001615
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001616 return -1;
1617}
1618
1619static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001620match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001621{
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001622 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001623}
1624
1625static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001626match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001627{
1628 PyObject* result;
1629 int i, size;
1630
1631 size = PyTuple_GET_SIZE(args);
1632
1633 switch (size) {
1634 case 0:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001635 result = match_getslice(self, Py_False, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001636 break;
1637 case 1:
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001638 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001639 break;
1640 default:
1641 /* fetch multiple items */
1642 result = PyTuple_New(size);
1643 if (!result)
1644 return NULL;
1645 for (i = 0; i < size; i++) {
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001646 PyObject* item = match_getslice(
1647 self, PyTuple_GET_ITEM(args, i), Py_None
1648 );
Guido van Rossumb700df92000-03-31 14:59:30 +00001649 if (!item) {
1650 Py_DECREF(result);
1651 return NULL;
1652 }
1653 PyTuple_SET_ITEM(result, i, item);
1654 }
1655 break;
1656 }
1657 return result;
1658}
1659
1660static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001661match_groups(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001662{
1663 PyObject* result;
1664 int index;
1665
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001666 PyObject* def = Py_None;
1667 if (!PyArg_ParseTuple(args, "|O", &def))
1668 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001669
Guido van Rossumb700df92000-03-31 14:59:30 +00001670 result = PyTuple_New(self->groups-1);
1671 if (!result)
1672 return NULL;
1673
1674 for (index = 1; index < self->groups; index++) {
1675 PyObject* item;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001676 item = match_getslice_by_index(self, index, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001677 if (!item) {
1678 Py_DECREF(result);
1679 return NULL;
1680 }
1681 PyTuple_SET_ITEM(result, index-1, item);
1682 }
1683
1684 return result;
1685}
1686
1687static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001688match_groupdict(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001689{
1690 PyObject* result;
1691 PyObject* keys;
1692 int index;
1693
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001694 PyObject* def = Py_None;
1695 if (!PyArg_ParseTuple(args, "|O", &def))
1696 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001697
Guido van Rossumb700df92000-03-31 14:59:30 +00001698 result = PyDict_New();
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001699 if (!result || !self->pattern->groupindex)
Guido van Rossumb700df92000-03-31 14:59:30 +00001700 return result;
1701
1702 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001703 if (!keys) {
1704 Py_DECREF(result);
Guido van Rossumb700df92000-03-31 14:59:30 +00001705 return NULL;
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001706 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001707
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001708 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001709 PyObject* key;
1710 PyObject* item;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001711 key = PyList_GET_ITEM(keys, index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001712 if (!key) {
1713 Py_DECREF(keys);
1714 Py_DECREF(result);
1715 return NULL;
1716 }
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001717 item = match_getslice(self, key, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001718 if (!item) {
1719 Py_DECREF(key);
1720 Py_DECREF(keys);
1721 Py_DECREF(result);
1722 return NULL;
1723 }
1724 /* FIXME: <fl> this can fail, right? */
1725 PyDict_SetItem(result, key, item);
1726 }
1727
1728 Py_DECREF(keys);
1729
1730 return result;
1731}
1732
1733static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001734match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001735{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001736 int index;
1737
1738 PyObject* index_ = Py_False;
1739 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001740 return NULL;
1741
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001742 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001743
Guido van Rossumb700df92000-03-31 14:59:30 +00001744 if (index < 0 || index >= self->groups) {
1745 PyErr_SetString(
1746 PyExc_IndexError,
1747 "no such group"
1748 );
1749 return NULL;
1750 }
1751
1752 if (self->mark[index*2] < 0) {
1753 Py_INCREF(Py_None);
1754 return Py_None;
1755 }
1756
1757 return Py_BuildValue("i", self->mark[index*2]);
1758}
1759
1760static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001761match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001762{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001763 int index;
1764
1765 PyObject* index_ = Py_False;
1766 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001767 return NULL;
1768
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001769 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001770
Guido van Rossumb700df92000-03-31 14:59:30 +00001771 if (index < 0 || index >= self->groups) {
1772 PyErr_SetString(
1773 PyExc_IndexError,
1774 "no such group"
1775 );
1776 return NULL;
1777 }
1778
1779 if (self->mark[index*2] < 0) {
1780 Py_INCREF(Py_None);
1781 return Py_None;
1782 }
1783
1784 return Py_BuildValue("i", self->mark[index*2+1]);
1785}
1786
1787static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001788match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001789{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001790 int index;
1791
1792 PyObject* index_ = Py_False;
1793 if (!PyArg_ParseTuple(args, "|O", &index_))
Guido van Rossumb700df92000-03-31 14:59:30 +00001794 return NULL;
1795
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001796 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001797
Guido van Rossumb700df92000-03-31 14:59:30 +00001798 if (index < 0 || index >= self->groups) {
1799 PyErr_SetString(
1800 PyExc_IndexError,
1801 "no such group"
1802 );
1803 return NULL;
1804 }
1805
1806 if (self->mark[index*2] < 0) {
1807 Py_INCREF(Py_None);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001808 Py_INCREF(Py_None);
1809 return Py_BuildValue("OO", Py_None, Py_None);
Guido van Rossumb700df92000-03-31 14:59:30 +00001810 }
1811
1812 return Py_BuildValue("ii", self->mark[index*2], self->mark[index*2+1]);
1813}
1814
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001815static PyMethodDef match_methods[] = {
1816 {"group", (PyCFunction) match_group, 1},
1817 {"start", (PyCFunction) match_start, 1},
1818 {"end", (PyCFunction) match_end, 1},
1819 {"span", (PyCFunction) match_span, 1},
1820 {"groups", (PyCFunction) match_groups, 1},
1821 {"groupdict", (PyCFunction) match_groupdict, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001822 {NULL, NULL}
1823};
1824
1825static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001826match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00001827{
1828 PyObject* res;
1829
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001830 res = Py_FindMethod(match_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00001831 if (res)
1832 return res;
1833
1834 PyErr_Clear();
1835
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001836 /* attributes */
Guido van Rossumb700df92000-03-31 14:59:30 +00001837 if (!strcmp(name, "string")) {
1838 Py_INCREF(self->string);
1839 return self->string;
1840 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001841
Guido van Rossumb700df92000-03-31 14:59:30 +00001842 if (!strcmp(name, "re")) {
1843 Py_INCREF(self->pattern);
1844 return (PyObject*) self->pattern;
1845 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001846
Guido van Rossumb700df92000-03-31 14:59:30 +00001847 if (!strcmp(name, "pos"))
1848 return Py_BuildValue("i", 0); /* FIXME */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001849
Guido van Rossumb700df92000-03-31 14:59:30 +00001850 if (!strcmp(name, "endpos"))
1851 return Py_BuildValue("i", 0); /* FIXME */
1852
1853 PyErr_SetString(PyExc_AttributeError, name);
1854 return NULL;
1855}
1856
1857/* FIXME: implement setattr("string", None) as a special case (to
1858 detach the associated string, if any */
1859
1860statichere PyTypeObject Match_Type = {
1861 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001862 0, "SRE_Match",
Guido van Rossumb700df92000-03-31 14:59:30 +00001863 sizeof(MatchObject), /* size of basic object */
1864 sizeof(int), /* space for group item */
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001865 (destructor)match_dealloc, /*tp_dealloc*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001866 0, /*tp_print*/
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001867 (getattrfunc)match_getattr, /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00001868};
1869
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001870/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001871/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001872
1873static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001874scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001875{
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001876 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001877 Py_DECREF(self->string);
1878 Py_DECREF(self->pattern);
1879 PyMem_DEL(self);
1880}
1881
1882static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001883scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001884{
1885 SRE_STATE* state = &self->state;
1886 PyObject* match;
1887 int status;
1888
1889 state->ptr = state->start;
1890
1891 if (state->charsize == 1) {
1892 status = sre_match(state, PatternObject_GetCode(self->pattern));
1893 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001894#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001895 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001896#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001897 }
1898
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001899 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001900 state, self->string, status);
1901
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001902 if (status == 0 || state->ptr == state->start)
1903 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001904 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001905 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001906
1907 return match;
1908}
1909
1910
1911static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001912scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001913{
1914 SRE_STATE* state = &self->state;
1915 PyObject* match;
1916 int status;
1917
1918 state->ptr = state->start;
1919
1920 if (state->charsize == 1) {
1921 status = sre_search(state, PatternObject_GetCode(self->pattern));
1922 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001923#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001924 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001925#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001926 }
1927
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001928 match = pattern_new_match((PatternObject*) self->pattern,
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001929 state, self->string, status);
1930
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001931 if (status == 0 || state->ptr == state->start)
1932 state->start = (void*) ((char*) state->ptr + state->charsize);
1933 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001934 state->start = state->ptr;
1935
1936 return match;
1937}
1938
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001939static PyMethodDef scanner_methods[] = {
1940 {"match", (PyCFunction) scanner_match, 0},
1941 {"search", (PyCFunction) scanner_search, 0},
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001942 {NULL, NULL}
1943};
1944
1945static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001946scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001947{
1948 PyObject* res;
1949
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001950 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001951 if (res)
1952 return res;
1953
1954 PyErr_Clear();
1955
1956 /* attributes */
1957 if (!strcmp(name, "pattern")) {
1958 Py_INCREF(self->pattern);
1959 return self->pattern;
1960 }
1961
1962 PyErr_SetString(PyExc_AttributeError, name);
1963 return NULL;
1964}
1965
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001966statichere PyTypeObject Scanner_Type = {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001967 PyObject_HEAD_INIT(NULL)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001968 0, "SRE_Scanner",
1969 sizeof(ScannerObject), /* size of basic object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001970 0,
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001971 (destructor)scanner_dealloc, /*tp_dealloc*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001972 0, /*tp_print*/
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001973 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001974};
1975
Guido van Rossumb700df92000-03-31 14:59:30 +00001976static PyMethodDef _functions[] = {
1977 {"compile", _compile, 1},
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001978 {"getcodesize", sre_codesize, 1},
Fredrik Lundhb389df32000-06-29 12:48:37 +00001979 {"getlower", sre_getlower, 1},
Guido van Rossumb700df92000-03-31 14:59:30 +00001980 {NULL, NULL}
1981};
1982
1983void
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001984#if defined(WIN32)
Guido van Rossumb700df92000-03-31 14:59:30 +00001985__declspec(dllexport)
1986#endif
1987init_sre()
1988{
1989 /* Patch object types */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001990 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001991 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001992
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001993 Py_InitModule("_" MODULE, _functions);
Guido van Rossumb700df92000-03-31 14:59:30 +00001994}
1995
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001996#endif /* !defined(SRE_RECURSIVE) */