blob: 619d39be6eab7da7baed036f0b419e248245fe39 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
Fredrik Lundhebc37b22000-10-28 19:30:41 +00009 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000010 * 2000-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000011 * 2000-09-20 fl added expand method
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000012 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000013 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000014 * 2001-04-28 fl added __copy__ methods (work in progress)
Fredrik Lundh09705f02002-11-22 12:46:35 +000015 * 2001-05-14 fl fixes for 1.5.2 compatibility
Fredrik Lundhf71ae462001-07-02 17:04:48 +000016 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000017 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000018 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000019 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh703ce812001-10-24 22:16:30 +000020 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000021 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Fredrik Lundh09705f02002-11-22 12:46:35 +000022 * 2002-11-09 fl fixed empty sub/subn return type
Guido van Rossumb700df92000-03-31 14:59:30 +000023 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000024 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000026 * This version of the SRE library can be redistributed under CNRI's
27 * Python 1.6 license. For any other use, please contact Secret Labs
28 * AB (info@pythonware.com).
29 *
Guido van Rossumb700df92000-03-31 14:59:30 +000030 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000031 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * other compatibility work.
33 */
34
35#ifndef SRE_RECURSIVE
36
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000037static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000038 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000039
40#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000041#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000042
43#include "sre.h"
44
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000045#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000046
Fredrik Lundh436c3d582000-06-29 08:58:44 +000047/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000048#if !defined(SRE_MODULE)
49#define SRE_MODULE "sre"
50#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051
Guido van Rossumb700df92000-03-31 14:59:30 +000052/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000053#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000054
Fredrik Lundh971e78b2001-10-20 17:48:46 +000055#if PY_VERSION_HEX >= 0x01060000
56#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000057/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000058#define HAVE_UNICODE
59#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000060#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundh33accc12000-08-27 20:59:47 +000065/* prevent run-away recursion (bad patterns on long strings) */
66
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000067#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000068#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
69/* require smaller recursion limit for a number of 64-bit platforms:
70 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
71/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
72#define USE_RECURSION_LIMIT 7500
73#else
74#define USE_RECURSION_LIMIT 10000
75#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000076#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000077
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define USE_FAST_SEARCH
80
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000082#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000083
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000084/* enables copy/deepcopy handling (work in progress) */
85#undef USE_BUILTIN_COPY
86
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000087#if PY_VERSION_HEX < 0x01060000
88#define PyObject_DEL(op) PyMem_DEL((op))
89#endif
90
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091/* -------------------------------------------------------------------- */
92
Fredrik Lundh80946112000-06-29 18:03:25 +000093#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000094#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000095#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000096/* fastest possible local call under MSVC */
97#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000098#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000099#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100#else
101#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000102#endif
103
104/* error codes */
105#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000106#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000107#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000108#define SRE_ERROR_MEMORY -9 /* out of memory */
109
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000110#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000111#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000112#else
113#define TRACE(v)
114#endif
115
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000116/* -------------------------------------------------------------------- */
117/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000118
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000119/* default character predicates (run sre_chars.py to regenerate tables) */
120
121#define SRE_DIGIT_MASK 1
122#define SRE_SPACE_MASK 2
123#define SRE_LINEBREAK_MASK 4
124#define SRE_ALNUM_MASK 8
125#define SRE_WORD_MASK 16
126
Fredrik Lundh21009b92001-09-18 18:47:09 +0000127/* FIXME: this assumes ASCII. create tables in init_sre() instead */
128
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000129static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1302, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1310, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
13225, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1340, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
136
Fredrik Lundhb389df32000-06-29 12:48:37 +0000137static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000013810, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13927, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
14044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
14161, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
142108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
143122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
144106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
145120, 121, 122, 123, 124, 125, 126, 127 };
146
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000147#define SRE_IS_DIGIT(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
149#define SRE_IS_SPACE(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
151#define SRE_IS_LINEBREAK(ch)\
152 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
153#define SRE_IS_ALNUM(ch)\
154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
155#define SRE_IS_WORD(ch)\
156 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000157
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000158static unsigned int sre_lower(unsigned int ch)
159{
160 return ((ch) < 128 ? sre_char_lower[ch] : ch);
161}
162
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000164
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
166#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
167#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
168#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
169#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
170
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000171static unsigned int sre_lower_locale(unsigned int ch)
172{
173 return ((ch) < 256 ? tolower((ch)) : ch);
174}
175
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176/* unicode-specific character predicates */
177
178#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000179
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000180#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
181#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
182#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000183#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000184#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000185
186static unsigned int sre_lower_unicode(unsigned int ch)
187{
188 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
189}
190
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000191#endif
192
Guido van Rossumb700df92000-03-31 14:59:30 +0000193LOCAL(int)
194sre_category(SRE_CODE category, unsigned int ch)
195{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000196 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000197
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 case SRE_CATEGORY_DIGIT:
199 return SRE_IS_DIGIT(ch);
200 case SRE_CATEGORY_NOT_DIGIT:
201 return !SRE_IS_DIGIT(ch);
202 case SRE_CATEGORY_SPACE:
203 return SRE_IS_SPACE(ch);
204 case SRE_CATEGORY_NOT_SPACE:
205 return !SRE_IS_SPACE(ch);
206 case SRE_CATEGORY_WORD:
207 return SRE_IS_WORD(ch);
208 case SRE_CATEGORY_NOT_WORD:
209 return !SRE_IS_WORD(ch);
210 case SRE_CATEGORY_LINEBREAK:
211 return SRE_IS_LINEBREAK(ch);
212 case SRE_CATEGORY_NOT_LINEBREAK:
213 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000214
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000215 case SRE_CATEGORY_LOC_WORD:
216 return SRE_LOC_IS_WORD(ch);
217 case SRE_CATEGORY_LOC_NOT_WORD:
218 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000219
220#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000221 case SRE_CATEGORY_UNI_DIGIT:
222 return SRE_UNI_IS_DIGIT(ch);
223 case SRE_CATEGORY_UNI_NOT_DIGIT:
224 return !SRE_UNI_IS_DIGIT(ch);
225 case SRE_CATEGORY_UNI_SPACE:
226 return SRE_UNI_IS_SPACE(ch);
227 case SRE_CATEGORY_UNI_NOT_SPACE:
228 return !SRE_UNI_IS_SPACE(ch);
229 case SRE_CATEGORY_UNI_WORD:
230 return SRE_UNI_IS_WORD(ch);
231 case SRE_CATEGORY_UNI_NOT_WORD:
232 return !SRE_UNI_IS_WORD(ch);
233 case SRE_CATEGORY_UNI_LINEBREAK:
234 return SRE_UNI_IS_LINEBREAK(ch);
235 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
236 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000237#else
238 case SRE_CATEGORY_UNI_DIGIT:
239 return SRE_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_NOT_DIGIT:
241 return !SRE_IS_DIGIT(ch);
242 case SRE_CATEGORY_UNI_SPACE:
243 return SRE_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_NOT_SPACE:
245 return !SRE_IS_SPACE(ch);
246 case SRE_CATEGORY_UNI_WORD:
247 return SRE_LOC_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_NOT_WORD:
249 return !SRE_LOC_IS_WORD(ch);
250 case SRE_CATEGORY_UNI_LINEBREAK:
251 return SRE_IS_LINEBREAK(ch);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
253 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000254#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000255 }
256 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000257}
258
259/* helpers */
260
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000261static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000262mark_fini(SRE_STATE* state)
263{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000264 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000265 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000266 state->mark_stack = NULL;
267 }
268 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000269}
270
271static int
272mark_save(SRE_STATE* state, int lo, int hi)
273{
274 void* stack;
275 int size;
276 int minsize, newsize;
277
278 if (hi <= lo)
279 return 0;
280
281 size = (hi - lo) + 1;
282
283 newsize = state->mark_stack_size;
284 minsize = state->mark_stack_base + size;
285
286 if (newsize < minsize) {
287 /* create new stack */
288 if (!newsize) {
289 newsize = 512;
290 if (newsize < minsize)
291 newsize = minsize;
292 TRACE(("allocate stack %d\n", newsize));
293 stack = malloc(sizeof(void*) * newsize);
294 } else {
295 /* grow the stack */
296 while (newsize < minsize)
297 newsize += newsize;
298 TRACE(("grow stack to %d\n", newsize));
299 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
300 }
301 if (!stack) {
302 mark_fini(state);
303 return SRE_ERROR_MEMORY;
304 }
305 state->mark_stack = stack;
306 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000307 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000308
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000309 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000310
311 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
312 size * sizeof(void*));
313
314 state->mark_stack_base += size;
315
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000316 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000317}
318
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000319static int
320mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000321{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000322 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000323
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000324 if (hi <= lo)
325 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000326
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000327 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000328
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000329 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000330
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000331 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000332
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000333 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
334 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000335
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000336 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000337}
338
Neal Norwitzaddfe0c2002-11-10 14:33:26 +0000339static void
340lastmark_restore(SRE_STATE *state, int lastmark)
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000341{
342 if (state->lastmark > lastmark) {
343 memset(
344 state->mark + lastmark + 1, 0,
345 (state->lastmark - lastmark) * sizeof(void*)
346 );
347 state->lastmark = lastmark;
348 state->lastindex = (lastmark == 0) ? -1 : (lastmark-1)/2+1;
349 }
350}
351
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000352/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000353
354#define SRE_CHAR unsigned char
355#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000356#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000357#define SRE_CHARSET sre_charset
358#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000359#define SRE_MATCH sre_match
360#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000361#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000362
363#if defined(HAVE_UNICODE)
364
Guido van Rossumb700df92000-03-31 14:59:30 +0000365#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000366#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000367#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000368
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000369#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000370#undef SRE_SEARCH
371#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000372#undef SRE_INFO
373#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000374#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000375#undef SRE_AT
376#undef SRE_CHAR
377
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000378/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000379
380#define SRE_CHAR Py_UNICODE
381#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000382#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000383#define SRE_CHARSET sre_ucharset
384#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000385#define SRE_MATCH sre_umatch
386#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000387#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000388#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000389
390#endif /* SRE_RECURSIVE */
391
392/* -------------------------------------------------------------------- */
393/* String matching engine */
394
395/* the following section is compiled twice, with different character
396 settings */
397
398LOCAL(int)
399SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
400{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000402
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000403 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000404
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000405 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000408 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 case SRE_AT_BEGINNING_LINE:
412 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000413 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000414
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000415 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000416 return (((void*) (ptr+1) == state->end &&
417 SRE_IS_LINEBREAK((int) ptr[0])) ||
418 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000420 case SRE_AT_END_LINE:
421 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000422 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000423
Fredrik Lundh770617b2001-01-14 15:06:11 +0000424 case SRE_AT_END_STRING:
425 return ((void*) ptr == state->end);
426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000427 case SRE_AT_BOUNDARY:
428 if (state->beginning == state->end)
429 return 0;
430 that = ((void*) ptr > state->beginning) ?
431 SRE_IS_WORD((int) ptr[-1]) : 0;
432 this = ((void*) ptr < state->end) ?
433 SRE_IS_WORD((int) ptr[0]) : 0;
434 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000435
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000436 case SRE_AT_NON_BOUNDARY:
437 if (state->beginning == state->end)
438 return 0;
439 that = ((void*) ptr > state->beginning) ?
440 SRE_IS_WORD((int) ptr[-1]) : 0;
441 this = ((void*) ptr < state->end) ?
442 SRE_IS_WORD((int) ptr[0]) : 0;
443 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000444
445 case SRE_AT_LOC_BOUNDARY:
446 if (state->beginning == state->end)
447 return 0;
448 that = ((void*) ptr > state->beginning) ?
449 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
450 this = ((void*) ptr < state->end) ?
451 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
452 return this != that;
453
454 case SRE_AT_LOC_NON_BOUNDARY:
455 if (state->beginning == state->end)
456 return 0;
457 that = ((void*) ptr > state->beginning) ?
458 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
459 this = ((void*) ptr < state->end) ?
460 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
461 return this == that;
462
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000463#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000464 case SRE_AT_UNI_BOUNDARY:
465 if (state->beginning == state->end)
466 return 0;
467 that = ((void*) ptr > state->beginning) ?
468 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
469 this = ((void*) ptr < state->end) ?
470 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
471 return this != that;
472
473 case SRE_AT_UNI_NON_BOUNDARY:
474 if (state->beginning == state->end)
475 return 0;
476 that = ((void*) ptr > state->beginning) ?
477 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
478 this = ((void*) ptr < state->end) ?
479 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
480 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000481#endif
482
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000483 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000484
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000485 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000486}
487
488LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000489SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000490{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000491 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000492
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000493 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000494
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000495 for (;;) {
496 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000497
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000498 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000499 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 if (ch == set[0])
501 return ok;
502 set++;
503 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000504
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000505 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000506 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000507 if (set[0] <= ch && ch <= set[1])
508 return ok;
509 set += 2;
510 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000511
Fredrik Lundh3562f112000-07-02 12:00:07 +0000512 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000513 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000514 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
515 return ok;
516 set += 16;
517 break;
518
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000519 case SRE_OP_BIGCHARSET:
520 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
521 {
522 int count, block;
523 count = *(set++);
524 block = ((unsigned char*)set)[ch >> 8];
525 set += 128;
526 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
527 return ok;
528 set += count*16;
529 break;
530 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000531
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000532 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000533 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000534 if (sre_category(set[0], (int) ch))
535 return ok;
536 set += 1;
537 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000538
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000539 case SRE_OP_NEGATE:
540 ok = !ok;
541 break;
542
543 case SRE_OP_FAILURE:
544 return !ok;
545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000546 default:
547 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000548 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000549 return 0;
550 }
551 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000552}
553
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000554LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
555
556LOCAL(int)
557SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
558{
559 SRE_CODE chr;
560 SRE_CHAR* ptr = state->ptr;
561 SRE_CHAR* end = state->end;
562 int i;
563
564 /* adjust end */
565 if (maxcount < end - ptr && maxcount != 65535)
566 end = ptr + maxcount;
567
568 switch (pattern[0]) {
569
570 case SRE_OP_ANY:
571 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000572 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000573 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
574 ptr++;
575 break;
576
577 case SRE_OP_ANY_ALL:
578 /* repeated dot wildcare. skip to the end of the target
579 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000581 ptr = end;
582 break;
583
584 case SRE_OP_LITERAL:
585 /* repeated literal */
586 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000587 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000588 while (ptr < end && (SRE_CODE) *ptr == chr)
589 ptr++;
590 break;
591
592 case SRE_OP_LITERAL_IGNORE:
593 /* repeated literal */
594 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000595 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000596 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
597 ptr++;
598 break;
599
600 case SRE_OP_NOT_LITERAL:
601 /* repeated non-literal */
602 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000603 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000604 while (ptr < end && (SRE_CODE) *ptr != chr)
605 ptr++;
606 break;
607
608 case SRE_OP_NOT_LITERAL_IGNORE:
609 /* repeated non-literal */
610 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000612 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
613 ptr++;
614 break;
615
616 case SRE_OP_IN:
617 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000618 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
619 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000620 ptr++;
621 break;
622
623 default:
624 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000625 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000626 while ((SRE_CHAR*) state->ptr < end) {
627 i = SRE_MATCH(state, pattern, level);
628 if (i < 0)
629 return i;
630 if (!i)
631 break;
632 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000633 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
634 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000635 return (SRE_CHAR*) state->ptr - ptr;
636 }
637
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000638 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000639 return ptr - (SRE_CHAR*) state->ptr;
640}
641
Fredrik Lundh33accc12000-08-27 20:59:47 +0000642#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000643LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000644SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
645{
646 /* check if an SRE_OP_INFO block matches at the current position.
647 returns the number of SRE_CODE objects to skip if successful, 0
648 if no match */
649
650 SRE_CHAR* end = state->end;
651 SRE_CHAR* ptr = state->ptr;
652 int i;
653
654 /* check minimal length */
655 if (pattern[3] && (end - ptr) < pattern[3])
656 return 0;
657
658 /* check known prefix */
659 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
660 /* <length> <skip> <prefix data> <overlap data> */
661 for (i = 0; i < pattern[5]; i++)
662 if ((SRE_CODE) ptr[i] != pattern[7 + i])
663 return 0;
664 return pattern[0] + 2 * pattern[6];
665 }
666 return pattern[0];
667}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000668#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000669
670LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000671SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000672{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000673 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 SRE_CHAR* end = state->end;
677 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000679 SRE_REPEAT* rp;
680 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000681 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000682
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000684
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000686
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000687#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000688 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000689 return SRE_ERROR_RECURSION_LIMIT;
690#endif
691
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000692#if defined(USE_RECURSION_LIMIT)
693 if (level > USE_RECURSION_LIMIT)
694 return SRE_ERROR_RECURSION_LIMIT;
695#endif
696
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000697 if (pattern[0] == SRE_OP_INFO) {
698 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000699 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000700 if (pattern[3] && (end - ptr) < pattern[3]) {
701 TRACE(("reject (got %d chars, need %d)\n",
702 (end - ptr), pattern[3]));
703 return 0;
704 }
705 pattern += pattern[1] + 1;
706 }
707
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000708 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000709
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000711
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000712 case SRE_OP_FAILURE:
713 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000714 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000715 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000716
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000717 case SRE_OP_SUCCESS:
718 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000719 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000720 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000721 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000722
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000723 case SRE_OP_AT:
724 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000725 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000726 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000728 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 pattern++;
730 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000731
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 case SRE_OP_CATEGORY:
733 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000734 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000735 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000737 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000739 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000741
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 case SRE_OP_LITERAL:
743 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000744 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000745 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000747 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 pattern++;
749 ptr++;
750 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 case SRE_OP_NOT_LITERAL:
753 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000754 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000755 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000756 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000757 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000758 pattern++;
759 ptr++;
760 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000761
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000763 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000764 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000765 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000766 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
767 return 0;
768 ptr++;
769 break;
770
771 case SRE_OP_ANY_ALL:
772 /* match anything */
773 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000774 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000775 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000776 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000777 ptr++;
778 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000779
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 case SRE_OP_IN:
781 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000782 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000783 TRACE(("|%p|%p|IN\n", pattern, ptr));
784 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000785 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000786 pattern += pattern[0];
787 ptr++;
788 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000789
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000790 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000791 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000792 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 i = pattern[0];
794 {
795 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
796 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
797 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000798 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000799 while (p < e) {
800 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000801 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000802 p++; ptr++;
803 }
804 }
805 pattern++;
806 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000807
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000808 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000809 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000810 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000811 i = pattern[0];
812 {
813 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
814 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
815 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000816 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 while (p < e) {
818 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000819 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000820 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000821 p++; ptr++;
822 }
823 }
824 pattern++;
825 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000826
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000827 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000828 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000829 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000830 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000831 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000832 pattern++;
833 ptr++;
834 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000835
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000836 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000837 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000838 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000839 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000840 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000841 pattern++;
842 ptr++;
843 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000845 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000846 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000847 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000848 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000849 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000850 pattern += pattern[0];
851 ptr++;
852 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000853
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000854 case SRE_OP_MARK:
855 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000856 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000857 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000858 i = pattern[0];
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000859 if (i > state->lastmark) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000860 state->lastmark = i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000861 if (i & 1)
862 state->lastindex = i/2 + 1;
863 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000864 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000865 pattern++;
866 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000867
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000868 case SRE_OP_JUMP:
869 case SRE_OP_INFO:
870 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000871 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000872 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000873 pattern += pattern[0];
874 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000875
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000876 case SRE_OP_ASSERT:
877 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000878 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000879 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000880 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000881 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000882 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000883 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000884 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000885 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000886 pattern += pattern[0];
887 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000888
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000889 case SRE_OP_ASSERT_NOT:
890 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000891 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000892 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000893 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000894 if (state->ptr >= state->beginning) {
895 i = SRE_MATCH(state, pattern + 2, level + 1);
896 if (i < 0)
897 return i;
898 if (i)
899 return 0;
900 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000901 pattern += pattern[0];
902 break;
903
904 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000905 /* alternation */
906 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000907 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000908 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000909 for (; pattern[0]; pattern += pattern[0]) {
910 if (pattern[1] == SRE_OP_LITERAL &&
911 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
912 continue;
913 if (pattern[1] == SRE_OP_IN &&
914 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
915 continue;
916 state->ptr = ptr;
917 i = SRE_MATCH(state, pattern + 1, level + 1);
918 if (i)
919 return i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000920 lastmark_restore(state, lastmark);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000921 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000922 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000923
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000924 case SRE_OP_REPEAT_ONE:
925 /* match repeated sequence (maximizing regexp) */
926
927 /* this operator only works if the repeated item is
928 exactly one character wide, and we're not already
929 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000930 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000931
932 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
933
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000934 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000935 pattern[1], pattern[2]));
936
Fredrik Lundhe1869832000-08-01 22:47:49 +0000937 if (ptr + pattern[1] > end)
938 return 0; /* cannot match */
939
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000940 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000941
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000942 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
943 if (count < 0)
944 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000945
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000946 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000947
948 /* when we arrive here, count contains the number of
949 matches, and ptr points to the tail of the target
950 string. check if the rest of the pattern matches,
951 and backtrack if not. */
952
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000953 if (count < (int) pattern[1])
954 return 0;
955
956 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
957 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000958 state->ptr = ptr;
959 return 1;
960
961 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
962 /* tail starts with a literal. skip positions where
963 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000964 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000965 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000966 while (count >= (int) pattern[1] &&
967 (ptr >= end || *ptr != chr)) {
968 ptr--;
969 count--;
970 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000971 if (count < (int) pattern[1])
972 break;
973 state->ptr = ptr;
974 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000975 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000976 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000977 ptr--;
978 count--;
979 }
980
981 } else {
982 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000983 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000984 while (count >= (int) pattern[1]) {
985 state->ptr = ptr;
986 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000987 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000988 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000989 ptr--;
990 count--;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000991 lastmark_restore(state, lastmark);
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000992 }
993 }
994 return 0;
995
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000996 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000997 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +0000998 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000999 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001000 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001001 pattern[1], pattern[2]));
1002
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001003 rep.count = -1;
1004 rep.pattern = pattern;
1005
1006 /* install new repeat context */
1007 rep.prev = state->repeat;
1008 state->repeat = &rep;
1009
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001010 state->ptr = ptr;
1011 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001012
1013 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001014
1015 return i;
1016
1017 case SRE_OP_MAX_UNTIL:
1018 /* maximizing repeat */
1019 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1020
1021 /* FIXME: we probably need to deal with zero-width
1022 matches in here... */
1023
1024 rp = state->repeat;
1025 if (!rp)
1026 return SRE_ERROR_STATE;
1027
1028 state->ptr = ptr;
1029
1030 count = rp->count + 1;
1031
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001032 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001033
1034 if (count < rp->pattern[1]) {
1035 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001036 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001037 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001038 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001039 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001040 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001041 rp->count = count - 1;
1042 state->ptr = ptr;
1043 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001044 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001045
1046 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001047 /* we may have enough matches, but if we can
1048 match another item, do so */
1049 rp->count = count;
1050 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001051 i = mark_save(state, 0, lastmark);
1052 if (i < 0)
1053 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001054 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001055 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001056 if (i)
1057 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001058 i = mark_restore(state, 0, lastmark);
1059 if (i < 0)
1060 return i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +00001061 lastmark_restore(state, lastmark);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001062 rp->count = count - 1;
1063 state->ptr = ptr;
1064 }
1065
1066 /* cannot match more repeated items here. make sure the
1067 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001068 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001069 i = SRE_MATCH(state, pattern, level + 1);
1070 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001071 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001072 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001073 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001074 return 0;
1075
1076 case SRE_OP_MIN_UNTIL:
1077 /* minimizing repeat */
1078 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1079
1080 rp = state->repeat;
1081 if (!rp)
1082 return SRE_ERROR_STATE;
1083
1084 count = rp->count + 1;
1085
Fredrik Lundh770617b2001-01-14 15:06:11 +00001086 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1087 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001088
1089 state->ptr = ptr;
1090
1091 if (count < rp->pattern[1]) {
1092 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001093 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001094 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001095 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001096 if (i)
1097 return i;
1098 rp->count = count-1;
1099 state->ptr = ptr;
1100 return 0;
1101 }
1102
1103 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001104 state->repeat = rp->prev;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001105 i = SRE_MATCH(state, pattern, level + 1);
1106 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001107 return i;
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001108
Fredrik Lundh770617b2001-01-14 15:06:11 +00001109 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001110 state->repeat = rp;
1111
1112 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1113 return 0;
1114
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001115 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001116 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001117 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001118 if (i)
1119 return i;
1120 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001121 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001122 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001123
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001124 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001125 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001126 return SRE_ERROR_ILLEGAL;
1127 }
1128 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001129
Sjoerd Mullender89dfe9e2001-08-30 14:37:07 +00001130 /* can't end up here */
Fredrik Lundh21009b92001-09-18 18:47:09 +00001131 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
Guido van Rossumb700df92000-03-31 14:59:30 +00001132}
1133
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001134LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001135SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1136{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001137 SRE_CHAR* ptr = state->start;
1138 SRE_CHAR* end = state->end;
1139 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001140 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001141 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001142 SRE_CODE* prefix = NULL;
1143 SRE_CODE* charset = NULL;
1144 SRE_CODE* overlap = NULL;
1145 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001146
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001147 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001148 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001149 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001150
1151 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001152
1153 if (pattern[3] > 0) {
1154 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001155 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001156 end -= pattern[3]-1;
1157 if (end <= ptr)
1158 end = ptr+1;
1159 }
1160
Fredrik Lundh3562f112000-07-02 12:00:07 +00001161 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001162 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001163 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001164 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001165 prefix_skip = pattern[6];
1166 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001167 overlap = prefix + prefix_len - 1;
1168 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001169 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001170 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001171 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001172
1173 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001174 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001175
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001176 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1177 TRACE(("charset = %p\n", charset));
1178
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001179#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001180 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001181 /* pattern starts with a known prefix. use the overlap
1182 table to skip forward as fast as we possibly can */
1183 int i = 0;
1184 end = state->end;
1185 while (ptr < end) {
1186 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001187 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001188 if (!i)
1189 break;
1190 else
1191 i = overlap[i];
1192 } else {
1193 if (++i == prefix_len) {
1194 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001195 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1196 state->start = ptr + 1 - prefix_len;
1197 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001198 if (flags & SRE_INFO_LITERAL)
1199 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001200 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001201 if (status != 0)
1202 return status;
1203 /* close but no cigar -- try again */
1204 i = overlap[i];
1205 }
1206 break;
1207 }
1208
1209 }
1210 ptr++;
1211 }
1212 return 0;
1213 }
1214#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001215
Fredrik Lundh3562f112000-07-02 12:00:07 +00001216 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001217 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001218 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001219 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001220 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001221 for (;;) {
1222 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1223 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001224 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001225 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001226 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001227 state->start = ptr;
1228 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001229 if (flags & SRE_INFO_LITERAL)
1230 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001231 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001232 if (status != 0)
1233 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001234 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001235 } else if (charset) {
1236 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001237 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001238 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001239 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001240 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001241 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001243 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001244 state->start = ptr;
1245 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001246 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001247 if (status != 0)
1248 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001249 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001250 }
1251 } else
1252 /* general case */
1253 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001254 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001255 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001256 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001257 if (status != 0)
1258 break;
1259 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001260
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001261 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001262}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001263
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001264LOCAL(int)
1265SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1266{
1267 /* check if given string is a literal template (i.e. no escapes) */
1268 while (len-- > 0)
1269 if (*ptr++ == '\\')
1270 return 0;
1271 return 1;
1272}
Guido van Rossumb700df92000-03-31 14:59:30 +00001273
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001274#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001275
1276/* -------------------------------------------------------------------- */
1277/* factories and destructors */
1278
1279/* see sre.h for object declarations */
1280
Jeremy Hylton938ace62002-07-17 16:30:39 +00001281static PyTypeObject Pattern_Type;
1282static PyTypeObject Match_Type;
1283static PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001284
1285static PyObject *
1286_compile(PyObject* self_, PyObject* args)
1287{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001288 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001289
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001290 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001291 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001292
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001293 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001294 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001295 PyObject* code;
1296 int groups = 0;
1297 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001298 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001299 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1300 &PyList_Type, &code, &groups,
1301 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001302 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001303
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001304 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001305
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001306 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001307 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001308 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001309
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001310 self->codesize = n;
1311
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001312 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001313 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001314 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001315 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001316
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001317 if (PyErr_Occurred()) {
1318 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001319 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001320 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001321
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001322 Py_INCREF(pattern);
1323 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001324
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001325 self->flags = flags;
1326
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001327 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001328
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001329 Py_XINCREF(groupindex);
1330 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001331
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001332 Py_XINCREF(indexgroup);
1333 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001334
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001335 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001336}
1337
1338static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001339sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001340{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001341 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001342}
1343
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001344static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001345sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001346{
1347 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001348 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001349 return NULL;
1350 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001351 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001352 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001353#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001354 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001355#else
1356 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001357#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001358 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001359}
1360
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001361LOCAL(void)
1362state_reset(SRE_STATE* state)
1363{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001364 state->lastmark = 0;
1365
1366 /* FIXME: dynamic! */
Neal Norwitz35fc7602002-06-13 21:11:11 +00001367 memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001368
1369 state->lastindex = -1;
1370
1371 state->repeat = NULL;
1372
1373 mark_fini(state);
1374}
1375
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001376static void*
1377getstring(PyObject* string, int* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001378{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001379 /* given a python object, return a data pointer, a length (in
1380 characters), and a character size. return NULL if the object
1381 is not a string (or not compatible) */
1382
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001383 PyBufferProcs *buffer;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001384 int size, bytes, charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001385 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001386
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001387#if defined(HAVE_UNICODE)
1388 if (PyUnicode_Check(string)) {
1389 /* unicode strings doesn't always support the buffer interface */
1390 ptr = (void*) PyUnicode_AS_DATA(string);
1391 bytes = PyUnicode_GET_DATA_SIZE(string);
1392 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001393 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001394
1395 } else {
1396#endif
1397
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001398 /* get pointer to string buffer */
1399 buffer = string->ob_type->tp_as_buffer;
1400 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1401 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001402 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001403 return NULL;
1404 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001405
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001406 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001407 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1408 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001409 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1410 return NULL;
1411 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001413 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001414#if PY_VERSION_HEX >= 0x01060000
1415 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001416#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001417 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001418#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001419
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001420 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001421 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001422#if defined(HAVE_UNICODE)
1423 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001424 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001425#endif
1426 else {
1427 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1428 return NULL;
1429 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001430
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001431#if defined(HAVE_UNICODE)
1432 }
1433#endif
1434
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001435 *p_length = size;
1436 *p_charsize = charsize;
1437
1438 return ptr;
1439}
1440
1441LOCAL(PyObject*)
1442state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1443 int start, int end)
1444{
1445 /* prepare state object */
1446
1447 int length;
1448 int charsize;
1449 void* ptr;
1450
1451 memset(state, 0, sizeof(SRE_STATE));
1452
1453 state->lastindex = -1;
1454
1455 ptr = getstring(string, &length, &charsize);
1456 if (!ptr)
1457 return NULL;
1458
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001459 /* adjust boundaries */
1460 if (start < 0)
1461 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001462 else if (start > length)
1463 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001464
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001465 if (end < 0)
1466 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001467 else if (end > length)
1468 end = length;
1469
1470 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001471
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001472 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001473
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001474 state->start = (void*) ((char*) ptr + start * state->charsize);
1475 state->end = (void*) ((char*) ptr + end * state->charsize);
1476
1477 Py_INCREF(string);
1478 state->string = string;
1479 state->pos = start;
1480 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001481
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001482 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001483 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001484 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001485#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001486 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001487#else
1488 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001489#endif
1490 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001491 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001492
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001493 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001494}
1495
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001496LOCAL(void)
1497state_fini(SRE_STATE* state)
1498{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001499 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001500 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001501}
1502
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001503/* calculate offset from start of string */
1504#define STATE_OFFSET(state, member)\
1505 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1506
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001507LOCAL(PyObject*)
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001508state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001509{
Fredrik Lundh58100642000-08-09 09:14:35 +00001510 int i, j;
1511
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001512 index = (index - 1) * 2;
1513
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001514 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001515 if (empty)
1516 /* want empty string */
1517 i = j = 0;
1518 else {
1519 Py_INCREF(Py_None);
1520 return Py_None;
1521 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001522 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001523 i = STATE_OFFSET(state, state->mark[index]);
1524 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001525 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001526
Fredrik Lundh58100642000-08-09 09:14:35 +00001527 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001528}
1529
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001530static void
1531pattern_error(int status)
1532{
1533 switch (status) {
1534 case SRE_ERROR_RECURSION_LIMIT:
1535 PyErr_SetString(
1536 PyExc_RuntimeError,
1537 "maximum recursion limit exceeded"
1538 );
1539 break;
1540 case SRE_ERROR_MEMORY:
1541 PyErr_NoMemory();
1542 break;
1543 default:
1544 /* other error codes indicate compiler/engine bugs */
1545 PyErr_SetString(
1546 PyExc_RuntimeError,
1547 "internal error in regular expression engine"
1548 );
1549 }
1550}
1551
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001552static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001553pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001554{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001555 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001556
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001557 MatchObject* match;
1558 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001559 char* base;
1560 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001561
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001562 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001563
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001564 /* create match object (with room for extra group marks) */
1565 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001566 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001567 if (!match)
1568 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001569
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001570 Py_INCREF(pattern);
1571 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001572
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001573 Py_INCREF(state->string);
1574 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001575
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001576 match->regs = NULL;
1577 match->groups = pattern->groups+1;
1578
1579 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001580
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001581 base = (char*) state->beginning;
1582 n = state->charsize;
1583
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001584 match->mark[0] = ((char*) state->start - base) / n;
1585 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001586
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001587 for (i = j = 0; i < pattern->groups; i++, j+=2)
1588 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1589 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1590 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1591 } else
1592 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1593
1594 match->pos = state->pos;
1595 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001596
Fredrik Lundh6f013982000-07-03 18:44:21 +00001597 match->lastindex = state->lastindex;
1598
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001599 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001600
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001601 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001602
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001603 /* no match */
1604 Py_INCREF(Py_None);
1605 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001606
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001607 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001608
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001609 /* internal error */
1610 pattern_error(status);
1611 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001612}
1613
1614static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001615pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001616{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001617 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001618
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001619 ScannerObject* self;
1620
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001621 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001622 int start = 0;
1623 int end = INT_MAX;
1624 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1625 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001626
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001627 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001628 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001629 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001630 return NULL;
1631
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001632 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001633 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001634 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001635 return NULL;
1636 }
1637
1638 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001639 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001640
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001641 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001642}
1643
Guido van Rossumb700df92000-03-31 14:59:30 +00001644static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001645pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001646{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001647 Py_XDECREF(self->pattern);
1648 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001649 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001650 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001651}
1652
1653static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001654pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001655{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001656 SRE_STATE state;
1657 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001658
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001659 PyObject* string;
1660 int start = 0;
1661 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001662 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1663 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1664 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001665 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001666
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001667 string = state_init(&state, self, string, start, end);
1668 if (!string)
1669 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001670
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001671 state.ptr = state.start;
1672
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001673 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1674
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001675 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001676 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001677 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001678#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001679 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001680#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001681 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001682
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001683 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1684
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001685 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001686
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001687 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001688}
1689
1690static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001691pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001692{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 SRE_STATE state;
1694 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001695
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001696 PyObject* string;
1697 int start = 0;
1698 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001699 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1700 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1701 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001703
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001704 string = state_init(&state, self, string, start, end);
1705 if (!string)
1706 return NULL;
1707
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001708 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1709
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001710 if (state.charsize == 1) {
1711 status = sre_search(&state, PatternObject_GetCode(self));
1712 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001713#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001714 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001715#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001716 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001717
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001718 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1719
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001720 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001721
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001722 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001723}
1724
1725static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001726call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001727{
1728 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001729 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001730 PyObject* func;
1731 PyObject* result;
1732
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001733 if (!args)
1734 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001735 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001736 if (!name)
1737 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001738 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001739 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001740 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001741 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001742 func = PyObject_GetAttrString(mod, function);
1743 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001744 if (!func)
1745 return NULL;
1746 result = PyObject_CallObject(func, args);
1747 Py_DECREF(func);
1748 Py_DECREF(args);
1749 return result;
1750}
1751
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001752#ifdef USE_BUILTIN_COPY
1753static int
1754deepcopy(PyObject** object, PyObject* memo)
1755{
1756 PyObject* copy;
1757
1758 copy = call(
1759 "copy", "deepcopy",
1760 Py_BuildValue("OO", *object, memo)
1761 );
1762 if (!copy)
1763 return 0;
1764
1765 Py_DECREF(*object);
1766 *object = copy;
1767
1768 return 1; /* success */
1769}
1770#endif
1771
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001772static PyObject*
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00001773join_list(PyObject* list, PyObject* pattern)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001774{
1775 /* join list elements */
1776
1777 PyObject* joiner;
1778#if PY_VERSION_HEX >= 0x01060000
1779 PyObject* function;
1780 PyObject* args;
1781#endif
1782 PyObject* result;
1783
1784 switch (PyList_GET_SIZE(list)) {
1785 case 0:
1786 Py_DECREF(list);
Fredrik Lundh09705f02002-11-22 12:46:35 +00001787 return PySequence_GetSlice(pattern, 0, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001788 case 1:
1789 result = PyList_GET_ITEM(list, 0);
1790 Py_INCREF(result);
1791 Py_DECREF(list);
1792 return result;
1793 }
1794
1795 /* two or more elements: slice out a suitable separator from the
1796 first member, and use that to join the entire list */
1797
1798 joiner = PySequence_GetSlice(pattern, 0, 0);
1799 if (!joiner)
1800 return NULL;
1801
1802#if PY_VERSION_HEX >= 0x01060000
1803 function = PyObject_GetAttrString(joiner, "join");
1804 if (!function) {
1805 Py_DECREF(joiner);
1806 return NULL;
1807 }
1808 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001809 if (!args) {
1810 Py_DECREF(function);
1811 Py_DECREF(joiner);
1812 return NULL;
1813 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001814 PyTuple_SET_ITEM(args, 0, list);
1815 result = PyObject_CallObject(function, args);
1816 Py_DECREF(args); /* also removes list */
1817 Py_DECREF(function);
1818#else
1819 result = call(
1820 "string", "join",
1821 Py_BuildValue("OO", list, joiner)
1822 );
1823#endif
1824 Py_DECREF(joiner);
1825
1826 return result;
1827}
1828
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001829static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001830pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001831{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001832 SRE_STATE state;
1833 PyObject* list;
1834 int status;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001835 int i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00001836
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001837 PyObject* string;
1838 int start = 0;
1839 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001840 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1841 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1842 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001843 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001845 string = state_init(&state, self, string, start, end);
1846 if (!string)
1847 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001848
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001849 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001850 if (!list) {
1851 state_fini(&state);
1852 return NULL;
1853 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001854
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001855 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001857 PyObject* item;
1858
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001859 state_reset(&state);
1860
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001861 state.ptr = state.start;
1862
1863 if (state.charsize == 1) {
1864 status = sre_search(&state, PatternObject_GetCode(self));
1865 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001866#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001867 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001868#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001869 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001870
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001871 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001872 if (status == 0)
1873 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001874 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001875 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001876 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001877
1878 /* don't bother to build a match object */
1879 switch (self->groups) {
1880 case 0:
1881 b = STATE_OFFSET(&state, state.start);
1882 e = STATE_OFFSET(&state, state.ptr);
1883 item = PySequence_GetSlice(string, b, e);
1884 if (!item)
1885 goto error;
1886 break;
1887 case 1:
1888 item = state_getslice(&state, 1, string, 1);
1889 if (!item)
1890 goto error;
1891 break;
1892 default:
1893 item = PyTuple_New(self->groups);
1894 if (!item)
1895 goto error;
1896 for (i = 0; i < self->groups; i++) {
1897 PyObject* o = state_getslice(&state, i+1, string, 1);
1898 if (!o) {
1899 Py_DECREF(item);
1900 goto error;
1901 }
1902 PyTuple_SET_ITEM(item, i, o);
1903 }
1904 break;
1905 }
1906
1907 status = PyList_Append(list, item);
1908 Py_DECREF(item);
1909 if (status < 0)
1910 goto error;
1911
1912 if (state.ptr == state.start)
1913 state.start = (void*) ((char*) state.ptr + state.charsize);
1914 else
1915 state.start = state.ptr;
1916
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001917 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001918
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001919 state_fini(&state);
1920 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001921
1922error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001923 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001924 state_fini(&state);
1925 return NULL;
1926
Guido van Rossumb700df92000-03-31 14:59:30 +00001927}
1928
Fredrik Lundh703ce812001-10-24 22:16:30 +00001929#if PY_VERSION_HEX >= 0x02020000
1930static PyObject*
1931pattern_finditer(PatternObject* pattern, PyObject* args)
1932{
1933 PyObject* scanner;
1934 PyObject* search;
1935 PyObject* iterator;
1936
1937 scanner = pattern_scanner(pattern, args);
1938 if (!scanner)
1939 return NULL;
1940
1941 search = PyObject_GetAttrString(scanner, "search");
1942 Py_DECREF(scanner);
1943 if (!search)
1944 return NULL;
1945
1946 iterator = PyCallIter_New(search, Py_None);
1947 Py_DECREF(search);
1948
1949 return iterator;
1950}
1951#endif
1952
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001953static PyObject*
1954pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
1955{
1956 SRE_STATE state;
1957 PyObject* list;
1958 PyObject* item;
1959 int status;
1960 int n;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001961 int i;
1962 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001963
1964 PyObject* string;
1965 int maxsplit = 0;
1966 static char* kwlist[] = { "source", "maxsplit", NULL };
1967 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
1968 &string, &maxsplit))
1969 return NULL;
1970
1971 string = state_init(&state, self, string, 0, INT_MAX);
1972 if (!string)
1973 return NULL;
1974
1975 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001976 if (!list) {
1977 state_fini(&state);
1978 return NULL;
1979 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001980
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001981 n = 0;
1982 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001983
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001984 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001985
1986 state_reset(&state);
1987
1988 state.ptr = state.start;
1989
1990 if (state.charsize == 1) {
1991 status = sre_search(&state, PatternObject_GetCode(self));
1992 } else {
1993#if defined(HAVE_UNICODE)
1994 status = sre_usearch(&state, PatternObject_GetCode(self));
1995#endif
1996 }
1997
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001998 if (status <= 0) {
1999 if (status == 0)
2000 break;
2001 pattern_error(status);
2002 goto error;
2003 }
2004
2005 if (state.start == state.ptr) {
2006 if (last == state.end)
2007 break;
2008 /* skip one character */
2009 state.start = (void*) ((char*) state.ptr + state.charsize);
2010 continue;
2011 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002012
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002013 /* get segment before this match */
2014 item = PySequence_GetSlice(
2015 string, STATE_OFFSET(&state, last),
2016 STATE_OFFSET(&state, state.start)
2017 );
2018 if (!item)
2019 goto error;
2020 status = PyList_Append(list, item);
2021 Py_DECREF(item);
2022 if (status < 0)
2023 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002024
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002025 /* add groups (if any) */
2026 for (i = 0; i < self->groups; i++) {
2027 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002028 if (!item)
2029 goto error;
2030 status = PyList_Append(list, item);
2031 Py_DECREF(item);
2032 if (status < 0)
2033 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002034 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002035
2036 n = n + 1;
2037
2038 last = state.start = state.ptr;
2039
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002040 }
2041
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002042 /* get segment following last match (even if empty) */
2043 item = PySequence_GetSlice(
2044 string, STATE_OFFSET(&state, last), state.endpos
2045 );
2046 if (!item)
2047 goto error;
2048 status = PyList_Append(list, item);
2049 Py_DECREF(item);
2050 if (status < 0)
2051 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002052
2053 state_fini(&state);
2054 return list;
2055
2056error:
2057 Py_DECREF(list);
2058 state_fini(&state);
2059 return NULL;
2060
2061}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002062
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002063static PyObject*
2064pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2065 int count, int subn)
2066{
2067 SRE_STATE state;
2068 PyObject* list;
2069 PyObject* item;
2070 PyObject* filter;
2071 PyObject* args;
2072 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002073 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002074 int status;
2075 int n;
2076 int i, b, e;
2077 int filter_is_callable;
2078
Fredrik Lundhdac58492001-10-21 21:48:30 +00002079 if (PyCallable_Check(template)) {
2080 /* sub/subn takes either a function or a template */
2081 filter = template;
2082 Py_INCREF(filter);
2083 filter_is_callable = 1;
2084 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002085 /* if not callable, check if it's a literal string */
2086 int literal;
2087 ptr = getstring(template, &n, &b);
2088 if (ptr) {
2089 if (b == 1) {
2090 literal = sre_literal_template(ptr, n);
2091 } else {
2092#if defined(HAVE_UNICODE)
2093 literal = sre_uliteral_template(ptr, n);
2094#endif
2095 }
2096 } else {
2097 PyErr_Clear();
2098 literal = 0;
2099 }
2100 if (literal) {
2101 filter = template;
2102 Py_INCREF(filter);
2103 filter_is_callable = 0;
2104 } else {
2105 /* not a literal; hand it over to the template compiler */
2106 filter = call(
2107 SRE_MODULE, "_subx",
2108 Py_BuildValue("OO", self, template)
2109 );
2110 if (!filter)
2111 return NULL;
2112 filter_is_callable = PyCallable_Check(filter);
2113 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002114 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002115
2116 string = state_init(&state, self, string, 0, INT_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002117 if (!string) {
2118 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002119 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002120 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002121
2122 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002123 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002124 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002125 state_fini(&state);
2126 return NULL;
2127 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002128
2129 n = i = 0;
2130
2131 while (!count || n < count) {
2132
2133 state_reset(&state);
2134
2135 state.ptr = state.start;
2136
2137 if (state.charsize == 1) {
2138 status = sre_search(&state, PatternObject_GetCode(self));
2139 } else {
2140#if defined(HAVE_UNICODE)
2141 status = sre_usearch(&state, PatternObject_GetCode(self));
2142#endif
2143 }
2144
2145 if (status <= 0) {
2146 if (status == 0)
2147 break;
2148 pattern_error(status);
2149 goto error;
2150 }
2151
2152 b = STATE_OFFSET(&state, state.start);
2153 e = STATE_OFFSET(&state, state.ptr);
2154
2155 if (i < b) {
2156 /* get segment before this match */
2157 item = PySequence_GetSlice(string, i, b);
2158 if (!item)
2159 goto error;
2160 status = PyList_Append(list, item);
2161 Py_DECREF(item);
2162 if (status < 0)
2163 goto error;
2164
2165 } else if (i == b && i == e && n > 0)
2166 /* ignore empty match on latest position */
2167 goto next;
2168
2169 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002170 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002171 match = pattern_new_match(self, &state, 1);
2172 if (!match)
2173 goto error;
2174 args = Py_BuildValue("(O)", match);
2175 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002176 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002177 goto error;
2178 }
2179 item = PyObject_CallObject(filter, args);
2180 Py_DECREF(args);
2181 Py_DECREF(match);
2182 if (!item)
2183 goto error;
2184 } else {
2185 /* filter is literal string */
2186 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002187 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002188 }
2189
2190 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002191 if (item != Py_None) {
2192 status = PyList_Append(list, item);
2193 Py_DECREF(item);
2194 if (status < 0)
2195 goto error;
2196 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002197
2198 i = e;
2199 n = n + 1;
2200
2201next:
2202 /* move on */
2203 if (state.ptr == state.start)
2204 state.start = (void*) ((char*) state.ptr + state.charsize);
2205 else
2206 state.start = state.ptr;
2207
2208 }
2209
2210 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002211 if (i < state.endpos) {
2212 item = PySequence_GetSlice(string, i, state.endpos);
2213 if (!item)
2214 goto error;
2215 status = PyList_Append(list, item);
2216 Py_DECREF(item);
2217 if (status < 0)
2218 goto error;
2219 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002220
2221 state_fini(&state);
2222
Guido van Rossum4e173842001-12-07 04:25:10 +00002223 Py_DECREF(filter);
2224
Fredrik Lundhdac58492001-10-21 21:48:30 +00002225 /* convert list to single string (also removes list) */
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00002226 item = join_list(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002227
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002228 if (!item)
2229 return NULL;
2230
2231 if (subn)
2232 return Py_BuildValue("Ni", item, n);
2233
2234 return item;
2235
2236error:
2237 Py_DECREF(list);
2238 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002239 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002240 return NULL;
2241
2242}
2243
2244static PyObject*
2245pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2246{
2247 PyObject* template;
2248 PyObject* string;
2249 int count = 0;
2250 static char* kwlist[] = { "repl", "string", "count", NULL };
2251 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2252 &template, &string, &count))
2253 return NULL;
2254
2255 return pattern_subx(self, template, string, count, 0);
2256}
2257
2258static PyObject*
2259pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2260{
2261 PyObject* template;
2262 PyObject* string;
2263 int count = 0;
2264 static char* kwlist[] = { "repl", "string", "count", NULL };
2265 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2266 &template, &string, &count))
2267 return NULL;
2268
2269 return pattern_subx(self, template, string, count, 1);
2270}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002271
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002272static PyObject*
2273pattern_copy(PatternObject* self, PyObject* args)
2274{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002275#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002276 PatternObject* copy;
2277 int offset;
2278
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002279 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2280 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002281
2282 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2283 if (!copy)
2284 return NULL;
2285
2286 offset = offsetof(PatternObject, groups);
2287
2288 Py_XINCREF(self->groupindex);
2289 Py_XINCREF(self->indexgroup);
2290 Py_XINCREF(self->pattern);
2291
2292 memcpy((char*) copy + offset, (char*) self + offset,
2293 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2294
2295 return (PyObject*) copy;
2296#else
2297 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2298 return NULL;
2299#endif
2300}
2301
2302static PyObject*
2303pattern_deepcopy(PatternObject* self, PyObject* args)
2304{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002305#ifdef USE_BUILTIN_COPY
2306 PatternObject* copy;
2307
2308 PyObject* memo;
2309 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2310 return NULL;
2311
2312 copy = (PatternObject*) pattern_copy(self, Py_None);
2313 if (!copy)
2314 return NULL;
2315
2316 if (!deepcopy(&copy->groupindex, memo) ||
2317 !deepcopy(&copy->indexgroup, memo) ||
2318 !deepcopy(&copy->pattern, memo)) {
2319 Py_DECREF(copy);
2320 return NULL;
2321 }
2322
2323#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002324 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2325 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002326#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002327}
2328
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002329static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002330 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2331 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2332 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2333 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2334 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2335 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002336#if PY_VERSION_HEX >= 0x02020000
2337 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS},
2338#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002339 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002340 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2341 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002342 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002343};
2344
2345static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002346pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002347{
2348 PyObject* res;
2349
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002350 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002352 if (res)
2353 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002354
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002355 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002356
2357 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002358 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002359 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002360 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002361 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002362
2363 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002364 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002365
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002366 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002367 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002368
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002369 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002370 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002371 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002372 }
2373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002374 PyErr_SetString(PyExc_AttributeError, name);
2375 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002376}
2377
2378statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002379 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002380 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002381 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002382 (destructor)pattern_dealloc, /*tp_dealloc*/
2383 0, /*tp_print*/
2384 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002385};
2386
2387/* -------------------------------------------------------------------- */
2388/* match methods */
2389
2390static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002391match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002392{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002393 Py_XDECREF(self->regs);
2394 Py_XDECREF(self->string);
2395 Py_DECREF(self->pattern);
2396 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002397}
2398
2399static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002400match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002401{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002402 if (index < 0 || index >= self->groups) {
2403 /* raise IndexError if we were given a bad group number */
2404 PyErr_SetString(
2405 PyExc_IndexError,
2406 "no such group"
2407 );
2408 return NULL;
2409 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002410
Fredrik Lundh6f013982000-07-03 18:44:21 +00002411 index *= 2;
2412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002413 if (self->string == Py_None || self->mark[index] < 0) {
2414 /* return default value if the string or group is undefined */
2415 Py_INCREF(def);
2416 return def;
2417 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002418
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002419 return PySequence_GetSlice(
2420 self->string, self->mark[index], self->mark[index+1]
2421 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002422}
2423
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002424static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002425match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002426{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002427 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002429 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002430 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002431
Fredrik Lundh6f013982000-07-03 18:44:21 +00002432 i = -1;
2433
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002434 if (self->pattern->groupindex) {
2435 index = PyObject_GetItem(self->pattern->groupindex, index);
2436 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002437 if (PyInt_Check(index))
2438 i = (int) PyInt_AS_LONG(index);
2439 Py_DECREF(index);
2440 } else
2441 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002442 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002443
2444 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002445}
2446
2447static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002448match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002449{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002450 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002451}
2452
2453static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002454match_expand(MatchObject* self, PyObject* args)
2455{
2456 PyObject* template;
2457 if (!PyArg_ParseTuple(args, "O:expand", &template))
2458 return NULL;
2459
2460 /* delegate to Python code */
2461 return call(
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002462 SRE_MODULE, "_expand",
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002463 Py_BuildValue("OOO", self->pattern, self, template)
2464 );
2465}
2466
2467static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002468match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002469{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002470 PyObject* result;
2471 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002473 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002474
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002475 switch (size) {
2476 case 0:
2477 result = match_getslice(self, Py_False, Py_None);
2478 break;
2479 case 1:
2480 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2481 break;
2482 default:
2483 /* fetch multiple items */
2484 result = PyTuple_New(size);
2485 if (!result)
2486 return NULL;
2487 for (i = 0; i < size; i++) {
2488 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002489 self, PyTuple_GET_ITEM(args, i), Py_None
2490 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002491 if (!item) {
2492 Py_DECREF(result);
2493 return NULL;
2494 }
2495 PyTuple_SET_ITEM(result, i, item);
2496 }
2497 break;
2498 }
2499 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002500}
2501
2502static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002503match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002504{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002505 PyObject* result;
2506 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002507
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002508 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002509 static char* kwlist[] = { "default", NULL };
2510 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002511 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002512
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002513 result = PyTuple_New(self->groups-1);
2514 if (!result)
2515 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002516
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002517 for (index = 1; index < self->groups; index++) {
2518 PyObject* item;
2519 item = match_getslice_by_index(self, index, def);
2520 if (!item) {
2521 Py_DECREF(result);
2522 return NULL;
2523 }
2524 PyTuple_SET_ITEM(result, index-1, item);
2525 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002526
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002527 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002528}
2529
2530static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002531match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002532{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002533 PyObject* result;
2534 PyObject* keys;
2535 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002536
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002537 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002538 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002539 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002540 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002541
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002542 result = PyDict_New();
2543 if (!result || !self->pattern->groupindex)
2544 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002546 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002547 if (!keys)
2548 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002549
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002550 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002551 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002552 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002553 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002554 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002555 if (!key)
2556 goto failed;
2557 value = match_getslice(self, key, def);
2558 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002559 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002560 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002561 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002562 status = PyDict_SetItem(result, key, value);
2563 Py_DECREF(value);
2564 if (status < 0)
2565 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002566 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002567
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002568 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002569
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002570 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002571
2572failed:
2573 Py_DECREF(keys);
2574 Py_DECREF(result);
2575 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002576}
2577
2578static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002579match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002580{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002581 int index;
2582
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002583 PyObject* index_ = Py_False; /* zero */
2584 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2585 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002586
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002587 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002588
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002589 if (index < 0 || index >= self->groups) {
2590 PyErr_SetString(
2591 PyExc_IndexError,
2592 "no such group"
2593 );
2594 return NULL;
2595 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002596
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002597 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002598 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002599}
2600
2601static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002602match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002603{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002604 int index;
2605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002606 PyObject* index_ = Py_False; /* zero */
2607 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2608 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002609
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002610 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002611
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002612 if (index < 0 || index >= self->groups) {
2613 PyErr_SetString(
2614 PyExc_IndexError,
2615 "no such group"
2616 );
2617 return NULL;
2618 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002619
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002620 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002621 return Py_BuildValue("i", self->mark[index*2+1]);
2622}
2623
2624LOCAL(PyObject*)
2625_pair(int i1, int i2)
2626{
2627 PyObject* pair;
2628 PyObject* item;
2629
2630 pair = PyTuple_New(2);
2631 if (!pair)
2632 return NULL;
2633
2634 item = PyInt_FromLong(i1);
2635 if (!item)
2636 goto error;
2637 PyTuple_SET_ITEM(pair, 0, item);
2638
2639 item = PyInt_FromLong(i2);
2640 if (!item)
2641 goto error;
2642 PyTuple_SET_ITEM(pair, 1, item);
2643
2644 return pair;
2645
2646 error:
2647 Py_DECREF(pair);
2648 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002649}
2650
2651static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002652match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002653{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002654 int index;
2655
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002656 PyObject* index_ = Py_False; /* zero */
2657 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2658 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002659
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002660 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002661
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002662 if (index < 0 || index >= self->groups) {
2663 PyErr_SetString(
2664 PyExc_IndexError,
2665 "no such group"
2666 );
2667 return NULL;
2668 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002669
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002670 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002671 return _pair(self->mark[index*2], self->mark[index*2+1]);
2672}
2673
2674static PyObject*
2675match_regs(MatchObject* self)
2676{
2677 PyObject* regs;
2678 PyObject* item;
2679 int index;
2680
2681 regs = PyTuple_New(self->groups);
2682 if (!regs)
2683 return NULL;
2684
2685 for (index = 0; index < self->groups; index++) {
2686 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2687 if (!item) {
2688 Py_DECREF(regs);
2689 return NULL;
2690 }
2691 PyTuple_SET_ITEM(regs, index, item);
2692 }
2693
2694 Py_INCREF(regs);
2695 self->regs = regs;
2696
2697 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002698}
2699
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002700static PyObject*
2701match_copy(MatchObject* self, PyObject* args)
2702{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002703#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002704 MatchObject* copy;
2705 int slots, offset;
2706
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002707 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2708 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002709
2710 slots = 2 * (self->pattern->groups+1);
2711
2712 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2713 if (!copy)
2714 return NULL;
2715
2716 /* this value a constant, but any compiler should be able to
2717 figure that out all by itself */
2718 offset = offsetof(MatchObject, string);
2719
2720 Py_XINCREF(self->pattern);
2721 Py_XINCREF(self->string);
2722 Py_XINCREF(self->regs);
2723
2724 memcpy((char*) copy + offset, (char*) self + offset,
2725 sizeof(MatchObject) + slots * sizeof(int) - offset);
2726
2727 return (PyObject*) copy;
2728#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002729 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002730 return NULL;
2731#endif
2732}
2733
2734static PyObject*
2735match_deepcopy(MatchObject* self, PyObject* args)
2736{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002737#ifdef USE_BUILTIN_COPY
2738 MatchObject* copy;
2739
2740 PyObject* memo;
2741 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2742 return NULL;
2743
2744 copy = (MatchObject*) match_copy(self, Py_None);
2745 if (!copy)
2746 return NULL;
2747
2748 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2749 !deepcopy(&copy->string, memo) ||
2750 !deepcopy(&copy->regs, memo)) {
2751 Py_DECREF(copy);
2752 return NULL;
2753 }
2754
2755#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002756 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2757 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002758#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002759}
2760
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002761static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002762 {"group", (PyCFunction) match_group, METH_VARARGS},
2763 {"start", (PyCFunction) match_start, METH_VARARGS},
2764 {"end", (PyCFunction) match_end, METH_VARARGS},
2765 {"span", (PyCFunction) match_span, METH_VARARGS},
2766 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2767 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2768 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002769 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2770 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002771 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002772};
2773
2774static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002775match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002776{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002777 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002778
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002779 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2780 if (res)
2781 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002782
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002783 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002784
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002785 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002786 if (self->lastindex >= 0)
2787 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002788 Py_INCREF(Py_None);
2789 return Py_None;
2790 }
2791
2792 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002793 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002794 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002795 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002796 );
2797 if (result)
2798 return result;
2799 PyErr_Clear();
2800 }
2801 Py_INCREF(Py_None);
2802 return Py_None;
2803 }
2804
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002805 if (!strcmp(name, "string")) {
2806 if (self->string) {
2807 Py_INCREF(self->string);
2808 return self->string;
2809 } else {
2810 Py_INCREF(Py_None);
2811 return Py_None;
2812 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002813 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002814
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002815 if (!strcmp(name, "regs")) {
2816 if (self->regs) {
2817 Py_INCREF(self->regs);
2818 return self->regs;
2819 } else
2820 return match_regs(self);
2821 }
2822
2823 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002824 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002825 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002826 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002827
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002828 if (!strcmp(name, "pos"))
2829 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002830
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002831 if (!strcmp(name, "endpos"))
2832 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002833
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002834 PyErr_SetString(PyExc_AttributeError, name);
2835 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002836}
2837
2838/* FIXME: implement setattr("string", None) as a special case (to
2839 detach the associated string, if any */
2840
2841statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002842 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002843 0, "_" SRE_MODULE ".SRE_Match",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002844 sizeof(MatchObject), sizeof(int),
2845 (destructor)match_dealloc, /*tp_dealloc*/
2846 0, /*tp_print*/
2847 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002848};
2849
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002850/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002851/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002852
2853static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002854scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002855{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002856 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002857 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002858 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002859}
2860
2861static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002862scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002863{
2864 SRE_STATE* state = &self->state;
2865 PyObject* match;
2866 int status;
2867
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002868 state_reset(state);
2869
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002870 state->ptr = state->start;
2871
2872 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002873 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002874 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002875#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002876 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002877#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002878 }
2879
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002880 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002881 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002882
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00002883 if ((status == 0 || state->ptr == state->start) &&
2884 state->ptr < state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002885 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002886 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002887 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002888
2889 return match;
2890}
2891
2892
2893static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002894scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002895{
2896 SRE_STATE* state = &self->state;
2897 PyObject* match;
2898 int status;
2899
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002900 state_reset(state);
2901
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002902 state->ptr = state->start;
2903
2904 if (state->charsize == 1) {
2905 status = sre_search(state, PatternObject_GetCode(self->pattern));
2906 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002907#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002908 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002909#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002910 }
2911
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002912 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002913 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002914
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00002915 if ((status == 0 || state->ptr == state->start) &&
2916 state->ptr < state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002917 state->start = (void*) ((char*) state->ptr + state->charsize);
2918 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002919 state->start = state->ptr;
2920
2921 return match;
2922}
2923
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002924static PyMethodDef scanner_methods[] = {
Neal Norwitzbb2769f2002-03-31 15:46:00 +00002925 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
2926 /* METH_OLDARGS is not in Python 1.5.2 */
2927 {"match", (PyCFunction) scanner_match, 0},
2928 {"search", (PyCFunction) scanner_search, 0},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002929 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002930};
2931
2932static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002933scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002934{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002935 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002936
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002937 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2938 if (res)
2939 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002940
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002941 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002942
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002943 /* attributes */
2944 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002945 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002946 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002947 }
2948
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002949 PyErr_SetString(PyExc_AttributeError, name);
2950 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002951}
2952
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002953statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002954 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002955 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002956 sizeof(ScannerObject), 0,
2957 (destructor)scanner_dealloc, /*tp_dealloc*/
2958 0, /*tp_print*/
2959 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002960};
2961
Guido van Rossumb700df92000-03-31 14:59:30 +00002962static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002963 {"compile", _compile, METH_VARARGS},
2964 {"getcodesize", sre_codesize, METH_VARARGS},
2965 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002966 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002967};
2968
Mark Hammond8235ea12002-07-19 06:55:41 +00002969PyMODINIT_FUNC init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002970{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002971 PyObject* m;
2972 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002973 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002974
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002975 /* Patch object types */
2976 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002977 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002978
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002979 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002980 d = PyModule_GetDict(m);
2981
Fredrik Lundh21009b92001-09-18 18:47:09 +00002982 x = PyInt_FromLong(SRE_MAGIC);
2983 if (x) {
2984 PyDict_SetItemString(d, "MAGIC", x);
2985 Py_DECREF(x);
2986 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002987
Fredrik Lundh21009b92001-09-18 18:47:09 +00002988 x = PyString_FromString(copyright);
2989 if (x) {
2990 PyDict_SetItemString(d, "copyright", x);
2991 Py_DECREF(x);
2992 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002993}
2994
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002995#endif /* !defined(SRE_RECURSIVE) */