blob: c520b604f40df0c803f278097923abce22f51f6a [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
9 * 2000-06-30 fl added fast search optimization
10 * 2000-06-30 fl added assert (lookahead) primitives, etc
11 * 2000-07-02 fl added charset optimizations, etc
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000012 * 2000-07-03 fl store code in pattern object, lookbehind, etc
13 * 2000-07-08 fl added regs attribute
Fredrik Lundhebc37b22000-10-28 19:30:41 +000014 * 2000-07-21 fl reset lastindex in scanner methods
15 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000016 * 2000-08-03 fl added recursion limit
17 * 2000-08-07 fl use PyOS_CheckStack() if available
18 * 2000-08-08 fl changed findall to return empty strings instead of None
19 * 2000-08-27 fl properly propagate memory errors
20 * 2000-09-02 fl return -1 instead of None for start/end/span
21 * 2000-09-20 fl added expand method
22 * 2000-09-21 fl don't use the buffer interface for unicode strings
Fredrik Lundh562586e2000-10-03 20:43:34 +000023 * 2000-10-03 fl fixed assert_not primitive; support keyword arguments
Fredrik Lundhebc37b22000-10-28 19:30:41 +000024 * 2000-10-24 fl really fixed assert_not; reset groups in findall
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * 2000-12-21 fl fixed memory leak in groupdict
26 * 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000027 * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
Fredrik Lundh6f5cba62001-01-16 07:05:29 +000028 * 2001-01-16 fl fixed memory leak in pattern destructor
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000029 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000030 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000031 * 2001-04-28 fl added __copy__ methods (work in progress)
32 * 2001-05-14 fl fixes for 1.5.2
Fredrik Lundhf71ae462001-07-02 17:04:48 +000033 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000034 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000035 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000036 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh6de22ef2001-10-22 21:18:08 +000037 * 2001-10-22 fl check for literal sub/subn templates
Guido van Rossumb700df92000-03-31 14:59:30 +000038 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000039 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000040 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000041 * This version of the SRE library can be redistributed under CNRI's
42 * Python 1.6 license. For any other use, please contact Secret Labs
43 * AB (info@pythonware.com).
44 *
Guido van Rossumb700df92000-03-31 14:59:30 +000045 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000046 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000047 * other compatibility work.
48 */
49
50#ifndef SRE_RECURSIVE
51
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000052static char copyright[] =
Fredrik Lundhbec95b92001-10-21 16:47:57 +000053 " SRE 2.2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000054
55#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000056#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000057
58#include "sre.h"
59
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000060#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh436c3d52000-06-29 08:58:44 +000062/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000063#if !defined(SRE_MODULE)
64#define SRE_MODULE "sre"
65#endif
Fredrik Lundh436c3d52000-06-29 08:58:44 +000066
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000069
Fredrik Lundh971e78b2001-10-20 17:48:46 +000070#if PY_VERSION_HEX >= 0x01060000
71#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000072/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000073#define HAVE_UNICODE
74#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000075#endif
Fredrik Lundh436c3d52000-06-29 08:58:44 +000076
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000078/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079
Fredrik Lundh33accc12000-08-27 20:59:47 +000080/* prevent run-away recursion (bad patterns on long strings) */
81
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000082#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000083#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
84/* require smaller recursion limit for a number of 64-bit platforms:
85 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
86/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
87#define USE_RECURSION_LIMIT 7500
88#else
89#define USE_RECURSION_LIMIT 10000
90#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000091#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000092
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000093/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000094#define USE_FAST_SEARCH
95
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000096/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000097#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000098
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000099/* enables copy/deepcopy handling (work in progress) */
100#undef USE_BUILTIN_COPY
101
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000102#if PY_VERSION_HEX < 0x01060000
103#define PyObject_DEL(op) PyMem_DEL((op))
104#endif
105
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000106/* -------------------------------------------------------------------- */
107
Fredrik Lundh80946112000-06-29 18:03:25 +0000108#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +0000109#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +0000110#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +0000111/* fastest possible local call under MSVC */
112#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000113#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000114#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000115#else
116#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000117#endif
118
119/* error codes */
120#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000121#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000122#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000123#define SRE_ERROR_MEMORY -9 /* out of memory */
124
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000125#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000126#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000127#else
128#define TRACE(v)
129#endif
130
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000131/* -------------------------------------------------------------------- */
132/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000133
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000134/* default character predicates (run sre_chars.py to regenerate tables) */
135
136#define SRE_DIGIT_MASK 1
137#define SRE_SPACE_MASK 2
138#define SRE_LINEBREAK_MASK 4
139#define SRE_ALNUM_MASK 8
140#define SRE_WORD_MASK 16
141
Fredrik Lundh21009b92001-09-18 18:47:09 +0000142/* FIXME: this assumes ASCII. create tables in init_sre() instead */
143
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000144static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1452, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1460, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
14725, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
14824, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1490, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
15024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
151
Fredrik Lundhb389df32000-06-29 12:48:37 +0000152static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000015310, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
15427, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
15544, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
15661, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
157108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
158122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
159106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
160120, 121, 122, 123, 124, 125, 126, 127 };
161
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000162#define SRE_IS_DIGIT(ch)\
163 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
164#define SRE_IS_SPACE(ch)\
165 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
166#define SRE_IS_LINEBREAK(ch)\
167 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
168#define SRE_IS_ALNUM(ch)\
169 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
170#define SRE_IS_WORD(ch)\
171 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000172
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000173static unsigned int sre_lower(unsigned int ch)
174{
175 return ((ch) < 128 ? sre_char_lower[ch] : ch);
176}
177
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000178/* locale-specific character predicates */
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000179
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000180#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
181#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
182#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
183#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
184#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
185
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000186static unsigned int sre_lower_locale(unsigned int ch)
187{
188 return ((ch) < 256 ? tolower((ch)) : ch);
189}
190
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000191/* unicode-specific character predicates */
192
193#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000194
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000195#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
196#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
197#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000198#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000199#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000200
201static unsigned int sre_lower_unicode(unsigned int ch)
202{
203 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
204}
205
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000206#endif
207
Guido van Rossumb700df92000-03-31 14:59:30 +0000208LOCAL(int)
209sre_category(SRE_CODE category, unsigned int ch)
210{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000211 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000212
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000213 case SRE_CATEGORY_DIGIT:
214 return SRE_IS_DIGIT(ch);
215 case SRE_CATEGORY_NOT_DIGIT:
216 return !SRE_IS_DIGIT(ch);
217 case SRE_CATEGORY_SPACE:
218 return SRE_IS_SPACE(ch);
219 case SRE_CATEGORY_NOT_SPACE:
220 return !SRE_IS_SPACE(ch);
221 case SRE_CATEGORY_WORD:
222 return SRE_IS_WORD(ch);
223 case SRE_CATEGORY_NOT_WORD:
224 return !SRE_IS_WORD(ch);
225 case SRE_CATEGORY_LINEBREAK:
226 return SRE_IS_LINEBREAK(ch);
227 case SRE_CATEGORY_NOT_LINEBREAK:
228 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000229
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000230 case SRE_CATEGORY_LOC_WORD:
231 return SRE_LOC_IS_WORD(ch);
232 case SRE_CATEGORY_LOC_NOT_WORD:
233 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000234
235#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000236 case SRE_CATEGORY_UNI_DIGIT:
237 return SRE_UNI_IS_DIGIT(ch);
238 case SRE_CATEGORY_UNI_NOT_DIGIT:
239 return !SRE_UNI_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_SPACE:
241 return SRE_UNI_IS_SPACE(ch);
242 case SRE_CATEGORY_UNI_NOT_SPACE:
243 return !SRE_UNI_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_WORD:
245 return SRE_UNI_IS_WORD(ch);
246 case SRE_CATEGORY_UNI_NOT_WORD:
247 return !SRE_UNI_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_LINEBREAK:
249 return SRE_UNI_IS_LINEBREAK(ch);
250 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
251 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000252#else
253 case SRE_CATEGORY_UNI_DIGIT:
254 return SRE_IS_DIGIT(ch);
255 case SRE_CATEGORY_UNI_NOT_DIGIT:
256 return !SRE_IS_DIGIT(ch);
257 case SRE_CATEGORY_UNI_SPACE:
258 return SRE_IS_SPACE(ch);
259 case SRE_CATEGORY_UNI_NOT_SPACE:
260 return !SRE_IS_SPACE(ch);
261 case SRE_CATEGORY_UNI_WORD:
262 return SRE_LOC_IS_WORD(ch);
263 case SRE_CATEGORY_UNI_NOT_WORD:
264 return !SRE_LOC_IS_WORD(ch);
265 case SRE_CATEGORY_UNI_LINEBREAK:
266 return SRE_IS_LINEBREAK(ch);
267 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
268 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000269#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000270 }
271 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000272}
273
274/* helpers */
275
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000276static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000277mark_fini(SRE_STATE* state)
278{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000279 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000280 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000281 state->mark_stack = NULL;
282 }
283 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000284}
285
286static int
287mark_save(SRE_STATE* state, int lo, int hi)
288{
289 void* stack;
290 int size;
291 int minsize, newsize;
292
293 if (hi <= lo)
294 return 0;
295
296 size = (hi - lo) + 1;
297
298 newsize = state->mark_stack_size;
299 minsize = state->mark_stack_base + size;
300
301 if (newsize < minsize) {
302 /* create new stack */
303 if (!newsize) {
304 newsize = 512;
305 if (newsize < minsize)
306 newsize = minsize;
307 TRACE(("allocate stack %d\n", newsize));
308 stack = malloc(sizeof(void*) * newsize);
309 } else {
310 /* grow the stack */
311 while (newsize < minsize)
312 newsize += newsize;
313 TRACE(("grow stack to %d\n", newsize));
314 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
315 }
316 if (!stack) {
317 mark_fini(state);
318 return SRE_ERROR_MEMORY;
319 }
320 state->mark_stack = stack;
321 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000322 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000323
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000324 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000325
326 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
327 size * sizeof(void*));
328
329 state->mark_stack_base += size;
330
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000331 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000332}
333
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000334static int
335mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000336{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000337 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000338
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000339 if (hi <= lo)
340 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000341
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000342 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000344 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000345
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000346 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000347
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000348 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
349 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000350
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000351 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000352}
353
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000354/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000355
356#define SRE_CHAR unsigned char
357#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000358#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000359#define SRE_CHARSET sre_charset
360#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000361#define SRE_MATCH sre_match
362#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000363#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000364
365#if defined(HAVE_UNICODE)
366
Guido van Rossumb700df92000-03-31 14:59:30 +0000367#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000368#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000369#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000370
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000371#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000372#undef SRE_SEARCH
373#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000374#undef SRE_INFO
375#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000376#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000377#undef SRE_AT
378#undef SRE_CHAR
379
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000380/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000381
382#define SRE_CHAR Py_UNICODE
383#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000384#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000385#define SRE_CHARSET sre_ucharset
386#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000387#define SRE_MATCH sre_umatch
388#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000389#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000390#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000391
392#endif /* SRE_RECURSIVE */
393
394/* -------------------------------------------------------------------- */
395/* String matching engine */
396
397/* the following section is compiled twice, with different character
398 settings */
399
400LOCAL(int)
401SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
402{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000403 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000404
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000405 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000410 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000412
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000413 case SRE_AT_BEGINNING_LINE:
414 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000415 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000416
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000417 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000418 return (((void*) (ptr+1) == state->end &&
419 SRE_IS_LINEBREAK((int) ptr[0])) ||
420 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000421
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 case SRE_AT_END_LINE:
423 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000424 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000425
Fredrik Lundh770617b2001-01-14 15:06:11 +0000426 case SRE_AT_END_STRING:
427 return ((void*) ptr == state->end);
428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 case SRE_AT_BOUNDARY:
430 if (state->beginning == state->end)
431 return 0;
432 that = ((void*) ptr > state->beginning) ?
433 SRE_IS_WORD((int) ptr[-1]) : 0;
434 this = ((void*) ptr < state->end) ?
435 SRE_IS_WORD((int) ptr[0]) : 0;
436 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000437
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000438 case SRE_AT_NON_BOUNDARY:
439 if (state->beginning == state->end)
440 return 0;
441 that = ((void*) ptr > state->beginning) ?
442 SRE_IS_WORD((int) ptr[-1]) : 0;
443 this = ((void*) ptr < state->end) ?
444 SRE_IS_WORD((int) ptr[0]) : 0;
445 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000446
447 case SRE_AT_LOC_BOUNDARY:
448 if (state->beginning == state->end)
449 return 0;
450 that = ((void*) ptr > state->beginning) ?
451 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
452 this = ((void*) ptr < state->end) ?
453 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
454 return this != that;
455
456 case SRE_AT_LOC_NON_BOUNDARY:
457 if (state->beginning == state->end)
458 return 0;
459 that = ((void*) ptr > state->beginning) ?
460 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
461 this = ((void*) ptr < state->end) ?
462 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
463 return this == that;
464
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000465#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000466 case SRE_AT_UNI_BOUNDARY:
467 if (state->beginning == state->end)
468 return 0;
469 that = ((void*) ptr > state->beginning) ?
470 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
471 this = ((void*) ptr < state->end) ?
472 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
473 return this != that;
474
475 case SRE_AT_UNI_NON_BOUNDARY:
476 if (state->beginning == state->end)
477 return 0;
478 that = ((void*) ptr > state->beginning) ?
479 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
480 this = ((void*) ptr < state->end) ?
481 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
482 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000483#endif
484
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000485 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000486
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000487 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000488}
489
490LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000491SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000492{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000493 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000494
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000495 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000496
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000497 for (;;) {
498 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000499
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000501 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000502 if (ch == set[0])
503 return ok;
504 set++;
505 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000506
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000507 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000508 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000509 if (set[0] <= ch && ch <= set[1])
510 return ok;
511 set += 2;
512 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000513
Fredrik Lundh3562f112000-07-02 12:00:07 +0000514 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000515 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000516 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
517 return ok;
518 set += 16;
519 break;
520
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000521 case SRE_OP_BIGCHARSET:
522 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
523 {
524 int count, block;
525 count = *(set++);
526 block = ((unsigned char*)set)[ch >> 8];
527 set += 128;
528 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
529 return ok;
530 set += count*16;
531 break;
532 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000533
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000534 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000535 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000536 if (sre_category(set[0], (int) ch))
537 return ok;
538 set += 1;
539 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000540
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000541 case SRE_OP_NEGATE:
542 ok = !ok;
543 break;
544
545 case SRE_OP_FAILURE:
546 return !ok;
547
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000548 default:
549 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000550 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000551 return 0;
552 }
553 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000554}
555
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000556LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
557
558LOCAL(int)
559SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
560{
561 SRE_CODE chr;
562 SRE_CHAR* ptr = state->ptr;
563 SRE_CHAR* end = state->end;
564 int i;
565
566 /* adjust end */
567 if (maxcount < end - ptr && maxcount != 65535)
568 end = ptr + maxcount;
569
570 switch (pattern[0]) {
571
572 case SRE_OP_ANY:
573 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000574 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000575 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
576 ptr++;
577 break;
578
579 case SRE_OP_ANY_ALL:
580 /* repeated dot wildcare. skip to the end of the target
581 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000582 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000583 ptr = end;
584 break;
585
586 case SRE_OP_LITERAL:
587 /* repeated literal */
588 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000589 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000590 while (ptr < end && (SRE_CODE) *ptr == chr)
591 ptr++;
592 break;
593
594 case SRE_OP_LITERAL_IGNORE:
595 /* repeated literal */
596 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000597 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000598 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
599 ptr++;
600 break;
601
602 case SRE_OP_NOT_LITERAL:
603 /* repeated non-literal */
604 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000605 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000606 while (ptr < end && (SRE_CODE) *ptr != chr)
607 ptr++;
608 break;
609
610 case SRE_OP_NOT_LITERAL_IGNORE:
611 /* repeated non-literal */
612 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000613 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000614 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
615 ptr++;
616 break;
617
618 case SRE_OP_IN:
619 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000620 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
621 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000622 ptr++;
623 break;
624
625 default:
626 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000627 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000628 while ((SRE_CHAR*) state->ptr < end) {
629 i = SRE_MATCH(state, pattern, level);
630 if (i < 0)
631 return i;
632 if (!i)
633 break;
634 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000635 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
636 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000637 return (SRE_CHAR*) state->ptr - ptr;
638 }
639
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000640 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000641 return ptr - (SRE_CHAR*) state->ptr;
642}
643
Fredrik Lundh33accc12000-08-27 20:59:47 +0000644#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000645LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000646SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
647{
648 /* check if an SRE_OP_INFO block matches at the current position.
649 returns the number of SRE_CODE objects to skip if successful, 0
650 if no match */
651
652 SRE_CHAR* end = state->end;
653 SRE_CHAR* ptr = state->ptr;
654 int i;
655
656 /* check minimal length */
657 if (pattern[3] && (end - ptr) < pattern[3])
658 return 0;
659
660 /* check known prefix */
661 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
662 /* <length> <skip> <prefix data> <overlap data> */
663 for (i = 0; i < pattern[5]; i++)
664 if ((SRE_CODE) ptr[i] != pattern[7 + i])
665 return 0;
666 return pattern[0] + 2 * pattern[6];
667 }
668 return pattern[0];
669}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000670#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000671
672LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000673SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000674{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000675 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000677
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 SRE_CHAR* end = state->end;
679 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000680 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000681 SRE_REPEAT* rp;
682 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000683 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000685 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000686
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000687 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000688
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000689#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000690 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000691 return SRE_ERROR_RECURSION_LIMIT;
692#endif
693
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000694#if defined(USE_RECURSION_LIMIT)
695 if (level > USE_RECURSION_LIMIT)
696 return SRE_ERROR_RECURSION_LIMIT;
697#endif
698
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000699 if (pattern[0] == SRE_OP_INFO) {
700 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000701 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000702 if (pattern[3] && (end - ptr) < pattern[3]) {
703 TRACE(("reject (got %d chars, need %d)\n",
704 (end - ptr), pattern[3]));
705 return 0;
706 }
707 pattern += pattern[1] + 1;
708 }
709
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000711
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000712 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000713
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000714 case SRE_OP_FAILURE:
715 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000716 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000717 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000718
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000719 case SRE_OP_SUCCESS:
720 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000721 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000722 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000723 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000724
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000725 case SRE_OP_AT:
726 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000727 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000728 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000730 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000731 pattern++;
732 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000733
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 case SRE_OP_CATEGORY:
735 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000736 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000737 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000739 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000741 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000743
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000744 case SRE_OP_LITERAL:
745 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000746 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000747 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000749 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000750 pattern++;
751 ptr++;
752 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000753
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000754 case SRE_OP_NOT_LITERAL:
755 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000756 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000757 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000758 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000759 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000760 pattern++;
761 ptr++;
762 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000763
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000764 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000765 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000766 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000767 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000768 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
769 return 0;
770 ptr++;
771 break;
772
773 case SRE_OP_ANY_ALL:
774 /* match anything */
775 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000776 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000777 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000778 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000779 ptr++;
780 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000781
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000782 case SRE_OP_IN:
783 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000784 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000785 TRACE(("|%p|%p|IN\n", pattern, ptr));
786 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000787 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000788 pattern += pattern[0];
789 ptr++;
790 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000791
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000792 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000794 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000795 i = pattern[0];
796 {
797 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
798 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
799 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000800 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000801 while (p < e) {
802 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000803 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 p++; ptr++;
805 }
806 }
807 pattern++;
808 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000809
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000810 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000811 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000812 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000813 i = pattern[0];
814 {
815 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
816 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
817 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000818 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000819 while (p < e) {
820 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000821 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000822 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000823 p++; ptr++;
824 }
825 }
826 pattern++;
827 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000828
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000829 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000830 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000831 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000832 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000833 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000834 pattern++;
835 ptr++;
836 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000837
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000838 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000839 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000840 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000841 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000842 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000843 pattern++;
844 ptr++;
845 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000846
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000847 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000848 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000849 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000850 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000851 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000852 pattern += pattern[0];
853 ptr++;
854 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000855
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000856 case SRE_OP_MARK:
857 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000858 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000859 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000860 i = pattern[0];
861 if (i & 1)
862 state->lastindex = i/2 + 1;
863 if (i > state->lastmark)
864 state->lastmark = i;
865 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000866 pattern++;
867 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000868
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000869 case SRE_OP_JUMP:
870 case SRE_OP_INFO:
871 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000872 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000873 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000874 pattern += pattern[0];
875 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000876
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000877 case SRE_OP_ASSERT:
878 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000879 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000880 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000881 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000882 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000883 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000884 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000885 if (i <= 0)
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000886 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000887 pattern += pattern[0];
888 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000889
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000890 case SRE_OP_ASSERT_NOT:
891 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000892 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000893 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000894 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000895 if (state->ptr >= state->beginning) {
896 i = SRE_MATCH(state, pattern + 2, level + 1);
897 if (i < 0)
898 return i;
899 if (i)
900 return 0;
901 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000902 pattern += pattern[0];
903 break;
904
905 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000906 /* alternation */
907 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000908 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000909 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000910 for (; pattern[0]; pattern += pattern[0]) {
911 if (pattern[1] == SRE_OP_LITERAL &&
912 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
913 continue;
914 if (pattern[1] == SRE_OP_IN &&
915 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
916 continue;
917 state->ptr = ptr;
918 i = SRE_MATCH(state, pattern + 1, level + 1);
919 if (i)
920 return i;
921 if (state->lastmark > lastmark) {
922 memset(
923 state->mark + lastmark + 1, 0,
924 (state->lastmark - lastmark) * sizeof(void*)
925 );
926 state->lastmark = lastmark;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000927 }
928 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000929 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000930
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000931 case SRE_OP_REPEAT_ONE:
932 /* match repeated sequence (maximizing regexp) */
933
934 /* this operator only works if the repeated item is
935 exactly one character wide, and we're not already
936 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000937 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000938
939 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
940
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000941 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000942 pattern[1], pattern[2]));
943
Fredrik Lundhe1869832000-08-01 22:47:49 +0000944 if (ptr + pattern[1] > end)
945 return 0; /* cannot match */
946
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000947 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000948
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000949 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
950 if (count < 0)
951 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000952
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000953 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000954
955 /* when we arrive here, count contains the number of
956 matches, and ptr points to the tail of the target
957 string. check if the rest of the pattern matches,
958 and backtrack if not. */
959
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000960 if (count < (int) pattern[1])
961 return 0;
962
963 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
964 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000965 state->ptr = ptr;
966 return 1;
967
968 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
969 /* tail starts with a literal. skip positions where
970 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000971 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000972 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000973 while (count >= (int) pattern[1] &&
974 (ptr >= end || *ptr != chr)) {
975 ptr--;
976 count--;
977 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000978 if (count < (int) pattern[1])
979 break;
980 state->ptr = ptr;
981 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000982 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000983 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000984 ptr--;
985 count--;
986 }
987
988 } else {
989 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000990 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000991 while (count >= (int) pattern[1]) {
992 state->ptr = ptr;
993 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000994 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000995 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000996 ptr--;
997 count--;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000998 if (state->lastmark > lastmark) {
999 memset(
1000 state->mark + lastmark + 1, 0,
1001 (state->lastmark - lastmark) * sizeof(void*)
1002 );
1003 state->lastmark = lastmark;
1004 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001005 }
1006 }
1007 return 0;
1008
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001009 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001010 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001011 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001012 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001013 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001014 pattern[1], pattern[2]));
1015
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001016 rep.count = -1;
1017 rep.pattern = pattern;
1018
1019 /* install new repeat context */
1020 rep.prev = state->repeat;
1021 state->repeat = &rep;
1022
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001023 state->ptr = ptr;
1024 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001025
1026 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001027
1028 return i;
1029
1030 case SRE_OP_MAX_UNTIL:
1031 /* maximizing repeat */
1032 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1033
1034 /* FIXME: we probably need to deal with zero-width
1035 matches in here... */
1036
1037 rp = state->repeat;
1038 if (!rp)
1039 return SRE_ERROR_STATE;
1040
1041 state->ptr = ptr;
1042
1043 count = rp->count + 1;
1044
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001045 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001046
1047 if (count < rp->pattern[1]) {
1048 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001049 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001050 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001051 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001052 if (i)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001053 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001054 rp->count = count - 1;
1055 state->ptr = ptr;
1056 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001057 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001058
1059 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001060 /* we may have enough matches, but if we can
1061 match another item, do so */
1062 rp->count = count;
1063 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001064 i = mark_save(state, 0, lastmark);
1065 if (i < 0)
1066 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001067 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001068 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001069 if (i)
1070 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001071 i = mark_restore(state, 0, lastmark);
Fredrik Lundh397a6542001-10-18 19:30:16 +00001072 state->lastmark = lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001073 if (i < 0)
1074 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001075 rp->count = count - 1;
1076 state->ptr = ptr;
1077 }
1078
1079 /* cannot match more repeated items here. make sure the
1080 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001081 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001082 i = SRE_MATCH(state, pattern, level + 1);
1083 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001084 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001085 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001086 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001087 return 0;
1088
1089 case SRE_OP_MIN_UNTIL:
1090 /* minimizing repeat */
1091 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1092
1093 rp = state->repeat;
1094 if (!rp)
1095 return SRE_ERROR_STATE;
1096
1097 count = rp->count + 1;
1098
Fredrik Lundh770617b2001-01-14 15:06:11 +00001099 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1100 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001101
1102 state->ptr = ptr;
1103
1104 if (count < rp->pattern[1]) {
1105 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001106 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001107 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001108 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001109 if (i)
1110 return i;
1111 rp->count = count-1;
1112 state->ptr = ptr;
1113 return 0;
1114 }
1115
1116 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001117 state->repeat = rp->prev;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +00001118 /* FIXME: the following fix doesn't always work (#133283) */
Fredrik Lundhdf781e62001-07-02 19:54:28 +00001119 if (rp->pattern[2] == 65535) {
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001120 /* unbounded repeat */
1121 for (;;) {
1122 i = SRE_MATCH(state, pattern, level + 1);
1123 if (i || ptr >= end)
1124 break;
1125 state->ptr = ++ptr;
1126 }
1127 } else
1128 i = SRE_MATCH(state, pattern, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001129 if (i) {
1130 /* free(rp); */
1131 return i;
1132 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001133
Fredrik Lundh770617b2001-01-14 15:06:11 +00001134 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001135 state->repeat = rp;
1136
1137 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1138 return 0;
1139
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001140 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001141 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001142 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001143 if (i)
1144 return i;
1145 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001146 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001147 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001148
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001149 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001150 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001151 return SRE_ERROR_ILLEGAL;
1152 }
1153 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001154
Sjoerd Mullender89dfe9e2001-08-30 14:37:07 +00001155 /* can't end up here */
Fredrik Lundh21009b92001-09-18 18:47:09 +00001156 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
Guido van Rossumb700df92000-03-31 14:59:30 +00001157}
1158
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001159LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001160SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1161{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001162 SRE_CHAR* ptr = state->start;
1163 SRE_CHAR* end = state->end;
1164 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001165 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001166 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001167 SRE_CODE* prefix = NULL;
1168 SRE_CODE* charset = NULL;
1169 SRE_CODE* overlap = NULL;
1170 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001171
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001172 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001173 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001174 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001175
1176 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001177
1178 if (pattern[3] > 0) {
1179 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001180 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001181 end -= pattern[3]-1;
1182 if (end <= ptr)
1183 end = ptr+1;
1184 }
1185
Fredrik Lundh3562f112000-07-02 12:00:07 +00001186 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001187 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001188 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001189 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001190 prefix_skip = pattern[6];
1191 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001192 overlap = prefix + prefix_len - 1;
1193 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001194 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001195 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001196 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001197
1198 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001199 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001200
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001201 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1202 TRACE(("charset = %p\n", charset));
1203
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001204#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001205 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001206 /* pattern starts with a known prefix. use the overlap
1207 table to skip forward as fast as we possibly can */
1208 int i = 0;
1209 end = state->end;
1210 while (ptr < end) {
1211 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001212 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001213 if (!i)
1214 break;
1215 else
1216 i = overlap[i];
1217 } else {
1218 if (++i == prefix_len) {
1219 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001220 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1221 state->start = ptr + 1 - prefix_len;
1222 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001223 if (flags & SRE_INFO_LITERAL)
1224 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001225 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001226 if (status != 0)
1227 return status;
1228 /* close but no cigar -- try again */
1229 i = overlap[i];
1230 }
1231 break;
1232 }
1233
1234 }
1235 ptr++;
1236 }
1237 return 0;
1238 }
1239#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001240
Fredrik Lundh3562f112000-07-02 12:00:07 +00001241 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001242 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001243 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001244 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001245 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001246 for (;;) {
1247 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1248 ptr++;
1249 if (ptr == end)
1250 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001251 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001252 state->start = ptr;
1253 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001254 if (flags & SRE_INFO_LITERAL)
1255 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001256 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001257 if (status != 0)
1258 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001259 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001260 } else if (charset) {
1261 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001262 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001263 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001264 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001265 ptr++;
1266 if (ptr == end)
1267 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001268 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001269 state->start = ptr;
1270 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001271 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001272 if (status != 0)
1273 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001274 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001275 }
1276 } else
1277 /* general case */
1278 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001279 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001280 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001281 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001282 if (status != 0)
1283 break;
1284 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001285
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001286 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001287}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001288
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001289LOCAL(int)
1290SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1291{
1292 /* check if given string is a literal template (i.e. no escapes) */
1293 while (len-- > 0)
1294 if (*ptr++ == '\\')
1295 return 0;
1296 return 1;
1297}
Guido van Rossumb700df92000-03-31 14:59:30 +00001298
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001299#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001300
1301/* -------------------------------------------------------------------- */
1302/* factories and destructors */
1303
1304/* see sre.h for object declarations */
1305
1306staticforward PyTypeObject Pattern_Type;
1307staticforward PyTypeObject Match_Type;
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001308staticforward PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001309
1310static PyObject *
1311_compile(PyObject* self_, PyObject* args)
1312{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001313 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001314
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001315 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001316 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001317
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001318 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001319 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001320 PyObject* code;
1321 int groups = 0;
1322 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001323 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001324 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1325 &PyList_Type, &code, &groups,
1326 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001327 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001328
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001329 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001330
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001331 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001332 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001333 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001334
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001335 self->codesize = n;
1336
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001337 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001338 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001339 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001340 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001341
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001342 if (PyErr_Occurred()) {
1343 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001344 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001345 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001346
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001347 Py_INCREF(pattern);
1348 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001349
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001350 self->flags = flags;
1351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001352 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001353
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001354 Py_XINCREF(groupindex);
1355 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 Py_XINCREF(indexgroup);
1358 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001359
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001360 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001361}
1362
1363static PyObject *
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001364sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001365{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001366 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001367}
1368
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001369static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001370sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001371{
1372 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001373 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001374 return NULL;
1375 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001376 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001377 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001378#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001379 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001380#else
1381 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001382#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001383 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001384}
1385
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001386LOCAL(void)
1387state_reset(SRE_STATE* state)
1388{
1389 int i;
1390
1391 state->lastmark = 0;
1392
1393 /* FIXME: dynamic! */
1394 for (i = 0; i < SRE_MARK_SIZE; i++)
1395 state->mark[i] = NULL;
1396
1397 state->lastindex = -1;
1398
1399 state->repeat = NULL;
1400
1401 mark_fini(state);
1402}
1403
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001404static void*
1405getstring(PyObject* string, int* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001406{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001407 /* given a python object, return a data pointer, a length (in
1408 characters), and a character size. return NULL if the object
1409 is not a string (or not compatible) */
1410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001411 PyBufferProcs *buffer;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001412 int size, bytes, charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001413 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001414
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001415#if defined(HAVE_UNICODE)
1416 if (PyUnicode_Check(string)) {
1417 /* unicode strings doesn't always support the buffer interface */
1418 ptr = (void*) PyUnicode_AS_DATA(string);
1419 bytes = PyUnicode_GET_DATA_SIZE(string);
1420 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001421 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001422
1423 } else {
1424#endif
1425
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001426 /* get pointer to string buffer */
1427 buffer = string->ob_type->tp_as_buffer;
1428 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1429 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001430 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001431 return NULL;
1432 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001433
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001434 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001435 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1436 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001437 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1438 return NULL;
1439 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001440
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001441 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001442#if PY_VERSION_HEX >= 0x01060000
1443 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001444#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001445 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001446#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001447
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001448 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001449 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001450#if defined(HAVE_UNICODE)
1451 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001452 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001453#endif
1454 else {
1455 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1456 return NULL;
1457 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001458
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001459#if defined(HAVE_UNICODE)
1460 }
1461#endif
1462
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001463 *p_length = size;
1464 *p_charsize = charsize;
1465
1466 return ptr;
1467}
1468
1469LOCAL(PyObject*)
1470state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1471 int start, int end)
1472{
1473 /* prepare state object */
1474
1475 int length;
1476 int charsize;
1477 void* ptr;
1478
1479 memset(state, 0, sizeof(SRE_STATE));
1480
1481 state->lastindex = -1;
1482
1483 ptr = getstring(string, &length, &charsize);
1484 if (!ptr)
1485 return NULL;
1486
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001487 /* adjust boundaries */
1488 if (start < 0)
1489 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001490 else if (start > length)
1491 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001492
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001493 if (end < 0)
1494 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001495 else if (end > length)
1496 end = length;
1497
1498 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001499
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001500 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001501
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001502 state->start = (void*) ((char*) ptr + start * state->charsize);
1503 state->end = (void*) ((char*) ptr + end * state->charsize);
1504
1505 Py_INCREF(string);
1506 state->string = string;
1507 state->pos = start;
1508 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001509
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001510 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001511 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001512 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001513#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001514 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001515#else
1516 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001517#endif
1518 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001519 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001520
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001521 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001522}
1523
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001524LOCAL(void)
1525state_fini(SRE_STATE* state)
1526{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001527 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001528 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001529}
1530
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001531/* calculate offset from start of string */
1532#define STATE_OFFSET(state, member)\
1533 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1534
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001535LOCAL(PyObject*)
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001536state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001537{
Fredrik Lundh58100642000-08-09 09:14:35 +00001538 int i, j;
1539
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001540 index = (index - 1) * 2;
1541
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001542 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001543 if (empty)
1544 /* want empty string */
1545 i = j = 0;
1546 else {
1547 Py_INCREF(Py_None);
1548 return Py_None;
1549 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001550 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001551 i = STATE_OFFSET(state, state->mark[index]);
1552 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001553 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001554
Fredrik Lundh58100642000-08-09 09:14:35 +00001555 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001556}
1557
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001558static void
1559pattern_error(int status)
1560{
1561 switch (status) {
1562 case SRE_ERROR_RECURSION_LIMIT:
1563 PyErr_SetString(
1564 PyExc_RuntimeError,
1565 "maximum recursion limit exceeded"
1566 );
1567 break;
1568 case SRE_ERROR_MEMORY:
1569 PyErr_NoMemory();
1570 break;
1571 default:
1572 /* other error codes indicate compiler/engine bugs */
1573 PyErr_SetString(
1574 PyExc_RuntimeError,
1575 "internal error in regular expression engine"
1576 );
1577 }
1578}
1579
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001580static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001581pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001582{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001583 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001584
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001585 MatchObject* match;
1586 int i, j;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001587 char* base;
1588 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001589
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001590 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001591
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001592 /* create match object (with room for extra group marks) */
1593 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001594 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001595 if (!match)
1596 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001597
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001598 Py_INCREF(pattern);
1599 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001601 Py_INCREF(state->string);
1602 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001603
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001604 match->regs = NULL;
1605 match->groups = pattern->groups+1;
1606
1607 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001608
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001609 base = (char*) state->beginning;
1610 n = state->charsize;
1611
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001612 match->mark[0] = ((char*) state->start - base) / n;
1613 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001614
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001615 for (i = j = 0; i < pattern->groups; i++, j+=2)
1616 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1617 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1618 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1619 } else
1620 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1621
1622 match->pos = state->pos;
1623 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001624
Fredrik Lundh6f013982000-07-03 18:44:21 +00001625 match->lastindex = state->lastindex;
1626
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001627 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001628
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001629 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001630
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001631 /* no match */
1632 Py_INCREF(Py_None);
1633 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001634
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001635 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001636
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001637 /* internal error */
1638 pattern_error(status);
1639 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001640}
1641
1642static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001643pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001644{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001645 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001646
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001647 ScannerObject* self;
1648
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001649 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001650 int start = 0;
1651 int end = INT_MAX;
1652 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1653 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001654
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001655 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001656 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001657 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001658 return NULL;
1659
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001660 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001661 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001662 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001663 return NULL;
1664 }
1665
1666 Py_INCREF(pattern);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001667 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001668
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001669 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001670}
1671
Guido van Rossumb700df92000-03-31 14:59:30 +00001672static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001673pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001674{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001675 Py_XDECREF(self->pattern);
1676 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001677 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001678 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001679}
1680
1681static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001682pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001683{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001684 SRE_STATE state;
1685 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001686
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001687 PyObject* string;
1688 int start = 0;
1689 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001690 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1691 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1692 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001694
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001695 string = state_init(&state, self, string, start, end);
1696 if (!string)
1697 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001698
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001699 state.ptr = state.start;
1700
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001701 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1702
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001703 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001704 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001705 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001706#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001707 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001708#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001709 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001710
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001711 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1712
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001713 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001714
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001715 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001716}
1717
1718static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001719pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001720{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001721 SRE_STATE state;
1722 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001723
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001724 PyObject* string;
1725 int start = 0;
1726 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001727 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1728 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1729 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001730 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001731
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001732 string = state_init(&state, self, string, start, end);
1733 if (!string)
1734 return NULL;
1735
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001736 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1737
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001738 if (state.charsize == 1) {
1739 status = sre_search(&state, PatternObject_GetCode(self));
1740 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001741#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001742 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001743#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001744 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001745
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001746 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1747
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001748 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001749
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001750 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001751}
1752
1753static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001754call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001755{
1756 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001757 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001758 PyObject* func;
1759 PyObject* result;
1760
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001761 if (!args)
1762 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001763 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001764 if (!name)
1765 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001766 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001767 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001768 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001769 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001770 func = PyObject_GetAttrString(mod, function);
1771 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001772 if (!func)
1773 return NULL;
1774 result = PyObject_CallObject(func, args);
1775 Py_DECREF(func);
1776 Py_DECREF(args);
1777 return result;
1778}
1779
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001780#ifdef USE_BUILTIN_COPY
1781static int
1782deepcopy(PyObject** object, PyObject* memo)
1783{
1784 PyObject* copy;
1785
1786 copy = call(
1787 "copy", "deepcopy",
1788 Py_BuildValue("OO", *object, memo)
1789 );
1790 if (!copy)
1791 return 0;
1792
1793 Py_DECREF(*object);
1794 *object = copy;
1795
1796 return 1; /* success */
1797}
1798#endif
1799
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001800static PyObject*
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001801join(PyObject* list, PyObject* pattern)
1802{
1803 /* join list elements */
1804
1805 PyObject* joiner;
1806#if PY_VERSION_HEX >= 0x01060000
1807 PyObject* function;
1808 PyObject* args;
1809#endif
1810 PyObject* result;
1811
1812 switch (PyList_GET_SIZE(list)) {
1813 case 0:
1814 Py_DECREF(list);
1815 return PyString_FromString("");
1816 case 1:
1817 result = PyList_GET_ITEM(list, 0);
1818 Py_INCREF(result);
1819 Py_DECREF(list);
1820 return result;
1821 }
1822
1823 /* two or more elements: slice out a suitable separator from the
1824 first member, and use that to join the entire list */
1825
1826 joiner = PySequence_GetSlice(pattern, 0, 0);
1827 if (!joiner)
1828 return NULL;
1829
1830#if PY_VERSION_HEX >= 0x01060000
1831 function = PyObject_GetAttrString(joiner, "join");
1832 if (!function) {
1833 Py_DECREF(joiner);
1834 return NULL;
1835 }
1836 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001837 if (!args) {
1838 Py_DECREF(function);
1839 Py_DECREF(joiner);
1840 return NULL;
1841 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001842 PyTuple_SET_ITEM(args, 0, list);
1843 result = PyObject_CallObject(function, args);
1844 Py_DECREF(args); /* also removes list */
1845 Py_DECREF(function);
1846#else
1847 result = call(
1848 "string", "join",
1849 Py_BuildValue("OO", list, joiner)
1850 );
1851#endif
1852 Py_DECREF(joiner);
1853
1854 return result;
1855}
1856
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001857static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001858pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001859{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001860 SRE_STATE state;
1861 PyObject* list;
1862 int status;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001863 int i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00001864
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001865 PyObject* string;
1866 int start = 0;
1867 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001868 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1869 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1870 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001871 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001872
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001873 string = state_init(&state, self, string, start, end);
1874 if (!string)
1875 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001876
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001877 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001878 if (!list) {
1879 state_fini(&state);
1880 return NULL;
1881 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001882
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001883 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001884
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001885 PyObject* item;
1886
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001887 state_reset(&state);
1888
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001889 state.ptr = state.start;
1890
1891 if (state.charsize == 1) {
1892 status = sre_search(&state, PatternObject_GetCode(self));
1893 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001894#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001895 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001896#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001897 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001898
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001899 if (status <= 0) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001900 if (status == 0)
1901 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001902 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001903 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001904 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001905
1906 /* don't bother to build a match object */
1907 switch (self->groups) {
1908 case 0:
1909 b = STATE_OFFSET(&state, state.start);
1910 e = STATE_OFFSET(&state, state.ptr);
1911 item = PySequence_GetSlice(string, b, e);
1912 if (!item)
1913 goto error;
1914 break;
1915 case 1:
1916 item = state_getslice(&state, 1, string, 1);
1917 if (!item)
1918 goto error;
1919 break;
1920 default:
1921 item = PyTuple_New(self->groups);
1922 if (!item)
1923 goto error;
1924 for (i = 0; i < self->groups; i++) {
1925 PyObject* o = state_getslice(&state, i+1, string, 1);
1926 if (!o) {
1927 Py_DECREF(item);
1928 goto error;
1929 }
1930 PyTuple_SET_ITEM(item, i, o);
1931 }
1932 break;
1933 }
1934
1935 status = PyList_Append(list, item);
1936 Py_DECREF(item);
1937 if (status < 0)
1938 goto error;
1939
1940 if (state.ptr == state.start)
1941 state.start = (void*) ((char*) state.ptr + state.charsize);
1942 else
1943 state.start = state.ptr;
1944
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001945 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001946
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001947 state_fini(&state);
1948 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001949
1950error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001951 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001952 state_fini(&state);
1953 return NULL;
1954
Guido van Rossumb700df92000-03-31 14:59:30 +00001955}
1956
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001957static PyObject*
1958pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
1959{
1960 SRE_STATE state;
1961 PyObject* list;
1962 PyObject* item;
1963 int status;
1964 int n;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001965 int i;
1966 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001967
1968 PyObject* string;
1969 int maxsplit = 0;
1970 static char* kwlist[] = { "source", "maxsplit", NULL };
1971 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
1972 &string, &maxsplit))
1973 return NULL;
1974
1975 string = state_init(&state, self, string, 0, INT_MAX);
1976 if (!string)
1977 return NULL;
1978
1979 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001980 if (!list) {
1981 state_fini(&state);
1982 return NULL;
1983 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001984
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001985 n = 0;
1986 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001987
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001988 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001989
1990 state_reset(&state);
1991
1992 state.ptr = state.start;
1993
1994 if (state.charsize == 1) {
1995 status = sre_search(&state, PatternObject_GetCode(self));
1996 } else {
1997#if defined(HAVE_UNICODE)
1998 status = sre_usearch(&state, PatternObject_GetCode(self));
1999#endif
2000 }
2001
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002002 if (status <= 0) {
2003 if (status == 0)
2004 break;
2005 pattern_error(status);
2006 goto error;
2007 }
2008
2009 if (state.start == state.ptr) {
2010 if (last == state.end)
2011 break;
2012 /* skip one character */
2013 state.start = (void*) ((char*) state.ptr + state.charsize);
2014 continue;
2015 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002016
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002017 /* get segment before this match */
2018 item = PySequence_GetSlice(
2019 string, STATE_OFFSET(&state, last),
2020 STATE_OFFSET(&state, state.start)
2021 );
2022 if (!item)
2023 goto error;
2024 status = PyList_Append(list, item);
2025 Py_DECREF(item);
2026 if (status < 0)
2027 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002028
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002029 /* add groups (if any) */
2030 for (i = 0; i < self->groups; i++) {
2031 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002032 if (!item)
2033 goto error;
2034 status = PyList_Append(list, item);
2035 Py_DECREF(item);
2036 if (status < 0)
2037 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002038 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002039
2040 n = n + 1;
2041
2042 last = state.start = state.ptr;
2043
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002044 }
2045
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002046 /* get segment following last match (even if empty) */
2047 item = PySequence_GetSlice(
2048 string, STATE_OFFSET(&state, last), state.endpos
2049 );
2050 if (!item)
2051 goto error;
2052 status = PyList_Append(list, item);
2053 Py_DECREF(item);
2054 if (status < 0)
2055 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002056
2057 state_fini(&state);
2058 return list;
2059
2060error:
2061 Py_DECREF(list);
2062 state_fini(&state);
2063 return NULL;
2064
2065}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002066
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002067static PyObject*
2068pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2069 int count, int subn)
2070{
2071 SRE_STATE state;
2072 PyObject* list;
2073 PyObject* item;
2074 PyObject* filter;
2075 PyObject* args;
2076 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002077 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002078 int status;
2079 int n;
2080 int i, b, e;
2081 int filter_is_callable;
2082
Fredrik Lundhdac58492001-10-21 21:48:30 +00002083 if (PyCallable_Check(template)) {
2084 /* sub/subn takes either a function or a template */
2085 filter = template;
2086 Py_INCREF(filter);
2087 filter_is_callable = 1;
2088 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002089 /* if not callable, check if it's a literal string */
2090 int literal;
2091 ptr = getstring(template, &n, &b);
2092 if (ptr) {
2093 if (b == 1) {
2094 literal = sre_literal_template(ptr, n);
2095 } else {
2096#if defined(HAVE_UNICODE)
2097 literal = sre_uliteral_template(ptr, n);
2098#endif
2099 }
2100 } else {
2101 PyErr_Clear();
2102 literal = 0;
2103 }
2104 if (literal) {
2105 filter = template;
2106 Py_INCREF(filter);
2107 filter_is_callable = 0;
2108 } else {
2109 /* not a literal; hand it over to the template compiler */
2110 filter = call(
2111 SRE_MODULE, "_subx",
2112 Py_BuildValue("OO", self, template)
2113 );
2114 if (!filter)
2115 return NULL;
2116 filter_is_callable = PyCallable_Check(filter);
2117 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002118 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002119
2120 string = state_init(&state, self, string, 0, INT_MAX);
2121 if (!string)
2122 return NULL;
2123
2124 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002125 if (!list) {
2126 state_fini(&state);
2127 return NULL;
2128 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002129
2130 n = i = 0;
2131
2132 while (!count || n < count) {
2133
2134 state_reset(&state);
2135
2136 state.ptr = state.start;
2137
2138 if (state.charsize == 1) {
2139 status = sre_search(&state, PatternObject_GetCode(self));
2140 } else {
2141#if defined(HAVE_UNICODE)
2142 status = sre_usearch(&state, PatternObject_GetCode(self));
2143#endif
2144 }
2145
2146 if (status <= 0) {
2147 if (status == 0)
2148 break;
2149 pattern_error(status);
2150 goto error;
2151 }
2152
2153 b = STATE_OFFSET(&state, state.start);
2154 e = STATE_OFFSET(&state, state.ptr);
2155
2156 if (i < b) {
2157 /* get segment before this match */
2158 item = PySequence_GetSlice(string, i, b);
2159 if (!item)
2160 goto error;
2161 status = PyList_Append(list, item);
2162 Py_DECREF(item);
2163 if (status < 0)
2164 goto error;
2165
2166 } else if (i == b && i == e && n > 0)
2167 /* ignore empty match on latest position */
2168 goto next;
2169
2170 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002171 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002172 match = pattern_new_match(self, &state, 1);
2173 if (!match)
2174 goto error;
2175 args = Py_BuildValue("(O)", match);
2176 if (!args) {
2177 Py_DECREF(args);
2178 goto error;
2179 }
2180 item = PyObject_CallObject(filter, args);
2181 Py_DECREF(args);
2182 Py_DECREF(match);
2183 if (!item)
2184 goto error;
2185 } else {
2186 /* filter is literal string */
2187 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002188 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002189 }
2190
2191 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002192 if (item != Py_None) {
2193 status = PyList_Append(list, item);
2194 Py_DECREF(item);
2195 if (status < 0)
2196 goto error;
2197 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002198
2199 i = e;
2200 n = n + 1;
2201
2202next:
2203 /* move on */
2204 if (state.ptr == state.start)
2205 state.start = (void*) ((char*) state.ptr + state.charsize);
2206 else
2207 state.start = state.ptr;
2208
2209 }
2210
2211 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002212 if (i < state.endpos) {
2213 item = PySequence_GetSlice(string, i, state.endpos);
2214 if (!item)
2215 goto error;
2216 status = PyList_Append(list, item);
2217 Py_DECREF(item);
2218 if (status < 0)
2219 goto error;
2220 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002221
2222 state_fini(&state);
2223
Fredrik Lundhdac58492001-10-21 21:48:30 +00002224 /* convert list to single string (also removes list) */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002225 item = join(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002226
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002227 if (!item)
2228 return NULL;
2229
2230 if (subn)
2231 return Py_BuildValue("Ni", item, n);
2232
2233 return item;
2234
2235error:
2236 Py_DECREF(list);
2237 state_fini(&state);
2238 return NULL;
2239
2240}
2241
2242static PyObject*
2243pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2244{
2245 PyObject* template;
2246 PyObject* string;
2247 int count = 0;
2248 static char* kwlist[] = { "repl", "string", "count", NULL };
2249 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2250 &template, &string, &count))
2251 return NULL;
2252
2253 return pattern_subx(self, template, string, count, 0);
2254}
2255
2256static PyObject*
2257pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2258{
2259 PyObject* template;
2260 PyObject* string;
2261 int count = 0;
2262 static char* kwlist[] = { "repl", "string", "count", NULL };
2263 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2264 &template, &string, &count))
2265 return NULL;
2266
2267 return pattern_subx(self, template, string, count, 1);
2268}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002269
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002270static PyObject*
2271pattern_copy(PatternObject* self, PyObject* args)
2272{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002273#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002274 PatternObject* copy;
2275 int offset;
2276
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002277 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2278 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002279
2280 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2281 if (!copy)
2282 return NULL;
2283
2284 offset = offsetof(PatternObject, groups);
2285
2286 Py_XINCREF(self->groupindex);
2287 Py_XINCREF(self->indexgroup);
2288 Py_XINCREF(self->pattern);
2289
2290 memcpy((char*) copy + offset, (char*) self + offset,
2291 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2292
2293 return (PyObject*) copy;
2294#else
2295 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2296 return NULL;
2297#endif
2298}
2299
2300static PyObject*
2301pattern_deepcopy(PatternObject* self, PyObject* args)
2302{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002303#ifdef USE_BUILTIN_COPY
2304 PatternObject* copy;
2305
2306 PyObject* memo;
2307 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2308 return NULL;
2309
2310 copy = (PatternObject*) pattern_copy(self, Py_None);
2311 if (!copy)
2312 return NULL;
2313
2314 if (!deepcopy(&copy->groupindex, memo) ||
2315 !deepcopy(&copy->indexgroup, memo) ||
2316 !deepcopy(&copy->pattern, memo)) {
2317 Py_DECREF(copy);
2318 return NULL;
2319 }
2320
2321#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002322 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2323 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002324#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002325}
2326
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002327static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002328 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2329 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2330 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2331 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2332 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2333 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh562586e2000-10-03 20:43:34 +00002334 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002335 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2336 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002337 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002338};
2339
2340static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002341pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002342{
2343 PyObject* res;
2344
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002345 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002346
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002347 if (res)
2348 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002349
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002350 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002351
2352 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002353 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002354 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002355 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002356 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002357
2358 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002359 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002360
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002361 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002362 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002363
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002364 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002365 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002366 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002367 }
2368
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002369 PyErr_SetString(PyExc_AttributeError, name);
2370 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002371}
2372
2373statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002374 PyObject_HEAD_INIT(NULL)
2375 0, "SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002376 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002377 (destructor)pattern_dealloc, /*tp_dealloc*/
2378 0, /*tp_print*/
2379 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002380};
2381
2382/* -------------------------------------------------------------------- */
2383/* match methods */
2384
2385static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002386match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002387{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002388 Py_XDECREF(self->regs);
2389 Py_XDECREF(self->string);
2390 Py_DECREF(self->pattern);
2391 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002392}
2393
2394static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002395match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002396{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002397 if (index < 0 || index >= self->groups) {
2398 /* raise IndexError if we were given a bad group number */
2399 PyErr_SetString(
2400 PyExc_IndexError,
2401 "no such group"
2402 );
2403 return NULL;
2404 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002405
Fredrik Lundh6f013982000-07-03 18:44:21 +00002406 index *= 2;
2407
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002408 if (self->string == Py_None || self->mark[index] < 0) {
2409 /* return default value if the string or group is undefined */
2410 Py_INCREF(def);
2411 return def;
2412 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002413
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002414 return PySequence_GetSlice(
2415 self->string, self->mark[index], self->mark[index+1]
2416 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002417}
2418
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002419static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002420match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002421{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002422 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002423
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002424 if (PyInt_Check(index))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002425 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002426
Fredrik Lundh6f013982000-07-03 18:44:21 +00002427 i = -1;
2428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002429 if (self->pattern->groupindex) {
2430 index = PyObject_GetItem(self->pattern->groupindex, index);
2431 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002432 if (PyInt_Check(index))
2433 i = (int) PyInt_AS_LONG(index);
2434 Py_DECREF(index);
2435 } else
2436 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002437 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002438
2439 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002440}
2441
2442static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002443match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002444{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002445 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002446}
2447
2448static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002449match_expand(MatchObject* self, PyObject* args)
2450{
2451 PyObject* template;
2452 if (!PyArg_ParseTuple(args, "O:expand", &template))
2453 return NULL;
2454
2455 /* delegate to Python code */
2456 return call(
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002457 SRE_MODULE, "_expand",
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002458 Py_BuildValue("OOO", self->pattern, self, template)
2459 );
2460}
2461
2462static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002463match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002464{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002465 PyObject* result;
2466 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002467
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002468 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002469
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002470 switch (size) {
2471 case 0:
2472 result = match_getslice(self, Py_False, Py_None);
2473 break;
2474 case 1:
2475 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2476 break;
2477 default:
2478 /* fetch multiple items */
2479 result = PyTuple_New(size);
2480 if (!result)
2481 return NULL;
2482 for (i = 0; i < size; i++) {
2483 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002484 self, PyTuple_GET_ITEM(args, i), Py_None
2485 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002486 if (!item) {
2487 Py_DECREF(result);
2488 return NULL;
2489 }
2490 PyTuple_SET_ITEM(result, i, item);
2491 }
2492 break;
2493 }
2494 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002495}
2496
2497static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002498match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002499{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002500 PyObject* result;
2501 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002502
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002503 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002504 static char* kwlist[] = { "default", NULL };
2505 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002506 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002507
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002508 result = PyTuple_New(self->groups-1);
2509 if (!result)
2510 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002511
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002512 for (index = 1; index < self->groups; index++) {
2513 PyObject* item;
2514 item = match_getslice_by_index(self, index, def);
2515 if (!item) {
2516 Py_DECREF(result);
2517 return NULL;
2518 }
2519 PyTuple_SET_ITEM(result, index-1, item);
2520 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002521
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002522 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002523}
2524
2525static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002526match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002527{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002528 PyObject* result;
2529 PyObject* keys;
2530 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002531
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002532 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002533 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002534 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002535 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002536
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002537 result = PyDict_New();
2538 if (!result || !self->pattern->groupindex)
2539 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002540
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002541 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002542 if (!keys)
2543 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002544
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002545 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002546 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002547 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002548 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002549 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002550 if (!key)
2551 goto failed;
2552 value = match_getslice(self, key, def);
2553 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002554 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002555 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002556 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002557 status = PyDict_SetItem(result, key, value);
2558 Py_DECREF(value);
2559 if (status < 0)
2560 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002561 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002562
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002563 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002564
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002565 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002566
2567failed:
2568 Py_DECREF(keys);
2569 Py_DECREF(result);
2570 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002571}
2572
2573static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002574match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002575{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002576 int index;
2577
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002578 PyObject* index_ = Py_False; /* zero */
2579 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2580 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002581
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002582 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002583
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002584 if (index < 0 || index >= self->groups) {
2585 PyErr_SetString(
2586 PyExc_IndexError,
2587 "no such group"
2588 );
2589 return NULL;
2590 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002591
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002592 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002593 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002594}
2595
2596static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002597match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002598{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002599 int index;
2600
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002601 PyObject* index_ = Py_False; /* zero */
2602 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2603 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002604
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002605 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002606
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002607 if (index < 0 || index >= self->groups) {
2608 PyErr_SetString(
2609 PyExc_IndexError,
2610 "no such group"
2611 );
2612 return NULL;
2613 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002614
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002615 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002616 return Py_BuildValue("i", self->mark[index*2+1]);
2617}
2618
2619LOCAL(PyObject*)
2620_pair(int i1, int i2)
2621{
2622 PyObject* pair;
2623 PyObject* item;
2624
2625 pair = PyTuple_New(2);
2626 if (!pair)
2627 return NULL;
2628
2629 item = PyInt_FromLong(i1);
2630 if (!item)
2631 goto error;
2632 PyTuple_SET_ITEM(pair, 0, item);
2633
2634 item = PyInt_FromLong(i2);
2635 if (!item)
2636 goto error;
2637 PyTuple_SET_ITEM(pair, 1, item);
2638
2639 return pair;
2640
2641 error:
2642 Py_DECREF(pair);
2643 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002644}
2645
2646static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002647match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002648{
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002649 int index;
2650
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002651 PyObject* index_ = Py_False; /* zero */
2652 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2653 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002654
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002655 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002656
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002657 if (index < 0 || index >= self->groups) {
2658 PyErr_SetString(
2659 PyExc_IndexError,
2660 "no such group"
2661 );
2662 return NULL;
2663 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002664
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002665 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002666 return _pair(self->mark[index*2], self->mark[index*2+1]);
2667}
2668
2669static PyObject*
2670match_regs(MatchObject* self)
2671{
2672 PyObject* regs;
2673 PyObject* item;
2674 int index;
2675
2676 regs = PyTuple_New(self->groups);
2677 if (!regs)
2678 return NULL;
2679
2680 for (index = 0; index < self->groups; index++) {
2681 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2682 if (!item) {
2683 Py_DECREF(regs);
2684 return NULL;
2685 }
2686 PyTuple_SET_ITEM(regs, index, item);
2687 }
2688
2689 Py_INCREF(regs);
2690 self->regs = regs;
2691
2692 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002693}
2694
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002695static PyObject*
2696match_copy(MatchObject* self, PyObject* args)
2697{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002698#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002699 MatchObject* copy;
2700 int slots, offset;
2701
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002702 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2703 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002704
2705 slots = 2 * (self->pattern->groups+1);
2706
2707 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2708 if (!copy)
2709 return NULL;
2710
2711 /* this value a constant, but any compiler should be able to
2712 figure that out all by itself */
2713 offset = offsetof(MatchObject, string);
2714
2715 Py_XINCREF(self->pattern);
2716 Py_XINCREF(self->string);
2717 Py_XINCREF(self->regs);
2718
2719 memcpy((char*) copy + offset, (char*) self + offset,
2720 sizeof(MatchObject) + slots * sizeof(int) - offset);
2721
2722 return (PyObject*) copy;
2723#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002724 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002725 return NULL;
2726#endif
2727}
2728
2729static PyObject*
2730match_deepcopy(MatchObject* self, PyObject* args)
2731{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002732#ifdef USE_BUILTIN_COPY
2733 MatchObject* copy;
2734
2735 PyObject* memo;
2736 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2737 return NULL;
2738
2739 copy = (MatchObject*) match_copy(self, Py_None);
2740 if (!copy)
2741 return NULL;
2742
2743 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2744 !deepcopy(&copy->string, memo) ||
2745 !deepcopy(&copy->regs, memo)) {
2746 Py_DECREF(copy);
2747 return NULL;
2748 }
2749
2750#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002751 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2752 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002753#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002754}
2755
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002756static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002757 {"group", (PyCFunction) match_group, METH_VARARGS},
2758 {"start", (PyCFunction) match_start, METH_VARARGS},
2759 {"end", (PyCFunction) match_end, METH_VARARGS},
2760 {"span", (PyCFunction) match_span, METH_VARARGS},
2761 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2762 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2763 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002764 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2765 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002766 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002767};
2768
2769static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002770match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002771{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002772 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002773
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002774 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2775 if (res)
2776 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002777
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002778 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002779
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002780 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002781 if (self->lastindex >= 0)
2782 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002783 Py_INCREF(Py_None);
2784 return Py_None;
2785 }
2786
2787 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002788 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002789 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002790 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002791 );
2792 if (result)
2793 return result;
2794 PyErr_Clear();
2795 }
2796 Py_INCREF(Py_None);
2797 return Py_None;
2798 }
2799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002800 if (!strcmp(name, "string")) {
2801 if (self->string) {
2802 Py_INCREF(self->string);
2803 return self->string;
2804 } else {
2805 Py_INCREF(Py_None);
2806 return Py_None;
2807 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002808 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002809
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002810 if (!strcmp(name, "regs")) {
2811 if (self->regs) {
2812 Py_INCREF(self->regs);
2813 return self->regs;
2814 } else
2815 return match_regs(self);
2816 }
2817
2818 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002819 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002820 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002821 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002822
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002823 if (!strcmp(name, "pos"))
2824 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002825
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002826 if (!strcmp(name, "endpos"))
2827 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002828
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002829 PyErr_SetString(PyExc_AttributeError, name);
2830 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002831}
2832
2833/* FIXME: implement setattr("string", None) as a special case (to
2834 detach the associated string, if any */
2835
2836statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002837 PyObject_HEAD_INIT(NULL)
2838 0, "SRE_Match",
2839 sizeof(MatchObject), sizeof(int),
2840 (destructor)match_dealloc, /*tp_dealloc*/
2841 0, /*tp_print*/
2842 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002843};
2844
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002845/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002846/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002847
2848static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002849scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002850{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002851 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002852 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002853 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002854}
2855
2856static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002857scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002858{
2859 SRE_STATE* state = &self->state;
2860 PyObject* match;
2861 int status;
2862
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002863 state_reset(state);
2864
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002865 state->ptr = state->start;
2866
2867 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002868 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002869 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002870#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002871 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002872#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002873 }
2874
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002875 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002876 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002877
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002878 if (status == 0 || state->ptr == state->start)
2879 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002880 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002881 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002882
2883 return match;
2884}
2885
2886
2887static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002888scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002889{
2890 SRE_STATE* state = &self->state;
2891 PyObject* match;
2892 int status;
2893
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002894 state_reset(state);
2895
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002896 state->ptr = state->start;
2897
2898 if (state->charsize == 1) {
2899 status = sre_search(state, PatternObject_GetCode(self->pattern));
2900 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002901#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002902 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002903#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002904 }
2905
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002906 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002907 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002908
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002909 if (status == 0 || state->ptr == state->start)
2910 state->start = (void*) ((char*) state->ptr + state->charsize);
2911 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002912 state->start = state->ptr;
2913
2914 return match;
2915}
2916
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002917static PyMethodDef scanner_methods[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002918 {"match", (PyCFunction) scanner_match, 0},
2919 {"search", (PyCFunction) scanner_search, 0},
2920 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002921};
2922
2923static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002924scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002925{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002926 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002927
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002928 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2929 if (res)
2930 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002931
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002932 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002934 /* attributes */
2935 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002936 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002937 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002938 }
2939
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002940 PyErr_SetString(PyExc_AttributeError, name);
2941 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002942}
2943
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002944statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002945 PyObject_HEAD_INIT(NULL)
2946 0, "SRE_Scanner",
2947 sizeof(ScannerObject), 0,
2948 (destructor)scanner_dealloc, /*tp_dealloc*/
2949 0, /*tp_print*/
2950 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002951};
2952
Guido van Rossumb700df92000-03-31 14:59:30 +00002953static PyMethodDef _functions[] = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002954 {"compile", _compile, 1},
2955 {"getcodesize", sre_codesize, 1},
2956 {"getlower", sre_getlower, 1},
2957 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002958};
2959
Tim Peters5687ffe2001-02-28 16:44:18 +00002960DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +00002961init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002962{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002963 PyObject* m;
2964 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002965 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002966
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002967 /* Patch object types */
2968 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002969 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00002970
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00002971 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002972 d = PyModule_GetDict(m);
2973
Fredrik Lundh21009b92001-09-18 18:47:09 +00002974 x = PyInt_FromLong(SRE_MAGIC);
2975 if (x) {
2976 PyDict_SetItemString(d, "MAGIC", x);
2977 Py_DECREF(x);
2978 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002979
Fredrik Lundh21009b92001-09-18 18:47:09 +00002980 x = PyString_FromString(copyright);
2981 if (x) {
2982 PyDict_SetItemString(d, "copyright", x);
2983 Py_DECREF(x);
2984 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002985}
2986
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002987#endif /* !defined(SRE_RECURSIVE) */