blob: be6920df6875f9a526d128ecbe57124e55845a29 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
Fredrik Lundhebc37b22000-10-28 19:30:41 +00009 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000010 * 2000-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000011 * 2000-09-20 fl added expand method
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000012 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000013 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000014 * 2001-04-28 fl added __copy__ methods (work in progress)
Fredrik Lundh09705f02002-11-22 12:46:35 +000015 * 2001-05-14 fl fixes for 1.5.2 compatibility
Fredrik Lundhf71ae462001-07-02 17:04:48 +000016 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000017 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000018 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000019 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh703ce812001-10-24 22:16:30 +000020 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000021 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Fredrik Lundh09705f02002-11-22 12:46:35 +000022 * 2002-11-09 fl fixed empty sub/subn return type
Martin v. Löwis78e2f062003-04-19 12:56:08 +000023 * 2003-04-18 mvl fully support 4-byte codes
Guido van Rossumb700df92000-03-31 14:59:30 +000024 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000025 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000027 * This version of the SRE library can be redistributed under CNRI's
28 * Python 1.6 license. For any other use, please contact Secret Labs
29 * AB (info@pythonware.com).
30 *
Guido van Rossumb700df92000-03-31 14:59:30 +000031 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000032 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * other compatibility work.
34 */
35
36#ifndef SRE_RECURSIVE
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
41#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000042#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000043
44#include "sre.h"
45
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000046#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000047
Fredrik Lundh436c3d582000-06-29 08:58:44 +000048/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000049#if !defined(SRE_MODULE)
50#define SRE_MODULE "sre"
51#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052
Guido van Rossumb700df92000-03-31 14:59:30 +000053/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000054#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000055
Fredrik Lundh971e78b2001-10-20 17:48:46 +000056#if PY_VERSION_HEX >= 0x01060000
57#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000058/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000059#define HAVE_UNICODE
60#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000061#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000062
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000064/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000065
Fredrik Lundh33accc12000-08-27 20:59:47 +000066/* prevent run-away recursion (bad patterns on long strings) */
67
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000068#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000069#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
70/* require smaller recursion limit for a number of 64-bit platforms:
71 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
72/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
73#define USE_RECURSION_LIMIT 7500
74#else
75#define USE_RECURSION_LIMIT 10000
76#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000077#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000078
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000080#define USE_FAST_SEARCH
81
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000082/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000083#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000084
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000085/* enables copy/deepcopy handling (work in progress) */
86#undef USE_BUILTIN_COPY
87
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000088#if PY_VERSION_HEX < 0x01060000
89#define PyObject_DEL(op) PyMem_DEL((op))
90#endif
91
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000092/* -------------------------------------------------------------------- */
93
Fredrik Lundh80946112000-06-29 18:03:25 +000094#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000095#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000096#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000097/* fastest possible local call under MSVC */
98#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000099#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000100#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000101#else
102#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000103#endif
104
105/* error codes */
106#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000107#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000108#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000109#define SRE_ERROR_MEMORY -9 /* out of memory */
110
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000111#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000112#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000113#else
114#define TRACE(v)
115#endif
116
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000117/* -------------------------------------------------------------------- */
118/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000119
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000120/* default character predicates (run sre_chars.py to regenerate tables) */
121
122#define SRE_DIGIT_MASK 1
123#define SRE_SPACE_MASK 2
124#define SRE_LINEBREAK_MASK 4
125#define SRE_ALNUM_MASK 8
126#define SRE_WORD_MASK 16
127
Fredrik Lundh21009b92001-09-18 18:47:09 +0000128/* FIXME: this assumes ASCII. create tables in init_sre() instead */
129
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000130static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1312, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
13325, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1350, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13624, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
137
Fredrik Lundhb389df32000-06-29 12:48:37 +0000138static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000013910, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
14027, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
14144, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
14261, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
143108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
144122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
145106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
146120, 121, 122, 123, 124, 125, 126, 127 };
147
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000148#define SRE_IS_DIGIT(ch)\
149 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
150#define SRE_IS_SPACE(ch)\
151 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
152#define SRE_IS_LINEBREAK(ch)\
153 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
154#define SRE_IS_ALNUM(ch)\
155 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
156#define SRE_IS_WORD(ch)\
157 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000158
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000159static unsigned int sre_lower(unsigned int ch)
160{
161 return ((ch) < 128 ? sre_char_lower[ch] : ch);
162}
163
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000164/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000165
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000166#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
167#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
168#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
169#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
170#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
171
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000172static unsigned int sre_lower_locale(unsigned int ch)
173{
174 return ((ch) < 256 ? tolower((ch)) : ch);
175}
176
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000177/* unicode-specific character predicates */
178
179#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000180
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
182#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
183#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000184#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000185#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000186
187static unsigned int sre_lower_unicode(unsigned int ch)
188{
189 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
190}
191
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000192#endif
193
Guido van Rossumb700df92000-03-31 14:59:30 +0000194LOCAL(int)
195sre_category(SRE_CODE category, unsigned int ch)
196{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000197 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000198
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000199 case SRE_CATEGORY_DIGIT:
200 return SRE_IS_DIGIT(ch);
201 case SRE_CATEGORY_NOT_DIGIT:
202 return !SRE_IS_DIGIT(ch);
203 case SRE_CATEGORY_SPACE:
204 return SRE_IS_SPACE(ch);
205 case SRE_CATEGORY_NOT_SPACE:
206 return !SRE_IS_SPACE(ch);
207 case SRE_CATEGORY_WORD:
208 return SRE_IS_WORD(ch);
209 case SRE_CATEGORY_NOT_WORD:
210 return !SRE_IS_WORD(ch);
211 case SRE_CATEGORY_LINEBREAK:
212 return SRE_IS_LINEBREAK(ch);
213 case SRE_CATEGORY_NOT_LINEBREAK:
214 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000215
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000216 case SRE_CATEGORY_LOC_WORD:
217 return SRE_LOC_IS_WORD(ch);
218 case SRE_CATEGORY_LOC_NOT_WORD:
219 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000220
221#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000222 case SRE_CATEGORY_UNI_DIGIT:
223 return SRE_UNI_IS_DIGIT(ch);
224 case SRE_CATEGORY_UNI_NOT_DIGIT:
225 return !SRE_UNI_IS_DIGIT(ch);
226 case SRE_CATEGORY_UNI_SPACE:
227 return SRE_UNI_IS_SPACE(ch);
228 case SRE_CATEGORY_UNI_NOT_SPACE:
229 return !SRE_UNI_IS_SPACE(ch);
230 case SRE_CATEGORY_UNI_WORD:
231 return SRE_UNI_IS_WORD(ch);
232 case SRE_CATEGORY_UNI_NOT_WORD:
233 return !SRE_UNI_IS_WORD(ch);
234 case SRE_CATEGORY_UNI_LINEBREAK:
235 return SRE_UNI_IS_LINEBREAK(ch);
236 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
237 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000238#else
239 case SRE_CATEGORY_UNI_DIGIT:
240 return SRE_IS_DIGIT(ch);
241 case SRE_CATEGORY_UNI_NOT_DIGIT:
242 return !SRE_IS_DIGIT(ch);
243 case SRE_CATEGORY_UNI_SPACE:
244 return SRE_IS_SPACE(ch);
245 case SRE_CATEGORY_UNI_NOT_SPACE:
246 return !SRE_IS_SPACE(ch);
247 case SRE_CATEGORY_UNI_WORD:
248 return SRE_LOC_IS_WORD(ch);
249 case SRE_CATEGORY_UNI_NOT_WORD:
250 return !SRE_LOC_IS_WORD(ch);
251 case SRE_CATEGORY_UNI_LINEBREAK:
252 return SRE_IS_LINEBREAK(ch);
253 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
254 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000255#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000256 }
257 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000258}
259
260/* helpers */
261
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000262static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000263mark_fini(SRE_STATE* state)
264{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000265 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000266 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000267 state->mark_stack = NULL;
268 }
269 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000270}
271
272static int
273mark_save(SRE_STATE* state, int lo, int hi)
274{
275 void* stack;
276 int size;
277 int minsize, newsize;
278
279 if (hi <= lo)
280 return 0;
281
282 size = (hi - lo) + 1;
283
284 newsize = state->mark_stack_size;
285 minsize = state->mark_stack_base + size;
286
287 if (newsize < minsize) {
288 /* create new stack */
289 if (!newsize) {
290 newsize = 512;
291 if (newsize < minsize)
292 newsize = minsize;
293 TRACE(("allocate stack %d\n", newsize));
294 stack = malloc(sizeof(void*) * newsize);
295 } else {
296 /* grow the stack */
297 while (newsize < minsize)
298 newsize += newsize;
299 TRACE(("grow stack to %d\n", newsize));
300 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
301 }
302 if (!stack) {
303 mark_fini(state);
304 return SRE_ERROR_MEMORY;
305 }
306 state->mark_stack = stack;
307 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000308 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000310 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311
312 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
313 size * sizeof(void*));
314
315 state->mark_stack_base += size;
316
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000317 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000318}
319
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000320static int
321mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000322{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000323 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000324
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000325 if (hi <= lo)
326 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000327
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000328 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000329
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000330 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000331
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000332 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000333
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000334 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
335 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000336
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000337 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000338}
339
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000340/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000341
342#define SRE_CHAR unsigned char
343#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000344#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000345#define SRE_CHARSET sre_charset
346#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000347#define SRE_MATCH sre_match
348#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000349#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000350
351#if defined(HAVE_UNICODE)
352
Guido van Rossumb700df92000-03-31 14:59:30 +0000353#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000354#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000355#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000356
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000357#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000358#undef SRE_SEARCH
359#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000360#undef SRE_INFO
361#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000362#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000363#undef SRE_AT
364#undef SRE_CHAR
365
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000366/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000367
368#define SRE_CHAR Py_UNICODE
369#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000370#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000371#define SRE_CHARSET sre_ucharset
372#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000373#define SRE_MATCH sre_umatch
374#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000375#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000376#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000377
378#endif /* SRE_RECURSIVE */
379
380/* -------------------------------------------------------------------- */
381/* String matching engine */
382
383/* the following section is compiled twice, with different character
384 settings */
385
386LOCAL(int)
387SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
388{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000389 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000390
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000391 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000396 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 case SRE_AT_BEGINNING_LINE:
400 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000401 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000402
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000403 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000404 return (((void*) (ptr+1) == state->end &&
405 SRE_IS_LINEBREAK((int) ptr[0])) ||
406 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000407
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000408 case SRE_AT_END_LINE:
409 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000410 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000411
Fredrik Lundh770617b2001-01-14 15:06:11 +0000412 case SRE_AT_END_STRING:
413 return ((void*) ptr == state->end);
414
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000415 case SRE_AT_BOUNDARY:
416 if (state->beginning == state->end)
417 return 0;
418 that = ((void*) ptr > state->beginning) ?
419 SRE_IS_WORD((int) ptr[-1]) : 0;
420 this = ((void*) ptr < state->end) ?
421 SRE_IS_WORD((int) ptr[0]) : 0;
422 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000423
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000424 case SRE_AT_NON_BOUNDARY:
425 if (state->beginning == state->end)
426 return 0;
427 that = ((void*) ptr > state->beginning) ?
428 SRE_IS_WORD((int) ptr[-1]) : 0;
429 this = ((void*) ptr < state->end) ?
430 SRE_IS_WORD((int) ptr[0]) : 0;
431 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000432
433 case SRE_AT_LOC_BOUNDARY:
434 if (state->beginning == state->end)
435 return 0;
436 that = ((void*) ptr > state->beginning) ?
437 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
438 this = ((void*) ptr < state->end) ?
439 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
440 return this != that;
441
442 case SRE_AT_LOC_NON_BOUNDARY:
443 if (state->beginning == state->end)
444 return 0;
445 that = ((void*) ptr > state->beginning) ?
446 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
447 this = ((void*) ptr < state->end) ?
448 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
449 return this == that;
450
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000451#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000452 case SRE_AT_UNI_BOUNDARY:
453 if (state->beginning == state->end)
454 return 0;
455 that = ((void*) ptr > state->beginning) ?
456 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
457 this = ((void*) ptr < state->end) ?
458 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
459 return this != that;
460
461 case SRE_AT_UNI_NON_BOUNDARY:
462 if (state->beginning == state->end)
463 return 0;
464 that = ((void*) ptr > state->beginning) ?
465 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
466 this = ((void*) ptr < state->end) ?
467 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
468 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000469#endif
470
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000471 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000473 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000474}
475
476LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000477SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000478{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000479 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000480
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000481 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000482
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000483 for (;;) {
484 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000485
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000486 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000487 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000488 if (ch == set[0])
489 return ok;
490 set++;
491 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000492
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000493 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000494 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000495 if (set[0] <= ch && ch <= set[1])
496 return ok;
497 set += 2;
498 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000499
Fredrik Lundh3562f112000-07-02 12:00:07 +0000500 case SRE_OP_CHARSET:
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000501 if (sizeof(SRE_CODE) == 2) {
502 /* <CHARSET> <bitmap> (16 bits per code word) */
503 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
504 return ok;
505 set += 16;
506 }
507 else {
508 /* <CHARSET> <bitmap> (32 bits per code word) */
509 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
510 return ok;
511 set += 8;
512 }
Fredrik Lundh3562f112000-07-02 12:00:07 +0000513 break;
514
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000515 case SRE_OP_BIGCHARSET:
516 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
517 {
518 int count, block;
519 count = *(set++);
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000520
521 if (sizeof(SRE_CODE) == 2) {
522 block = ((unsigned char*)set)[ch >> 8];
523 set += 128;
524 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
525 return ok;
526 set += count*16;
527 }
528 else {
529 if (ch < 65536)
530 block = ((unsigned char*)set)[ch >> 8];
531 else
532 block = -1;
533 set += 64;
534 if (block >=0 &&
535 (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
536 return ok;
537 set += count*8;
538 }
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000539 break;
540 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000541
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000542 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000543 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000544 if (sre_category(set[0], (int) ch))
545 return ok;
546 set += 1;
547 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000548
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000549 case SRE_OP_NEGATE:
550 ok = !ok;
551 break;
552
553 case SRE_OP_FAILURE:
554 return !ok;
555
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000556 default:
557 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000558 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000559 return 0;
560 }
561 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000562}
563
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000564LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
565
566LOCAL(int)
567SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
568{
569 SRE_CODE chr;
570 SRE_CHAR* ptr = state->ptr;
571 SRE_CHAR* end = state->end;
572 int i;
573
574 /* adjust end */
575 if (maxcount < end - ptr && maxcount != 65535)
576 end = ptr + maxcount;
577
578 switch (pattern[0]) {
579
580 case SRE_OP_ANY:
581 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000582 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000583 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
584 ptr++;
585 break;
586
587 case SRE_OP_ANY_ALL:
588 /* repeated dot wildcare. skip to the end of the target
589 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000590 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000591 ptr = end;
592 break;
593
594 case SRE_OP_LITERAL:
595 /* repeated literal */
596 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000597 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000598 while (ptr < end && (SRE_CODE) *ptr == chr)
599 ptr++;
600 break;
601
602 case SRE_OP_LITERAL_IGNORE:
603 /* repeated literal */
604 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000605 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000606 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
607 ptr++;
608 break;
609
610 case SRE_OP_NOT_LITERAL:
611 /* repeated non-literal */
612 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000613 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000614 while (ptr < end && (SRE_CODE) *ptr != chr)
615 ptr++;
616 break;
617
618 case SRE_OP_NOT_LITERAL_IGNORE:
619 /* repeated non-literal */
620 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000621 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000622 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
623 ptr++;
624 break;
625
626 case SRE_OP_IN:
627 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000628 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
629 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000630 ptr++;
631 break;
632
633 default:
634 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000635 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000636 while ((SRE_CHAR*) state->ptr < end) {
637 i = SRE_MATCH(state, pattern, level);
638 if (i < 0)
639 return i;
640 if (!i)
641 break;
642 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000643 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
644 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000645 return (SRE_CHAR*) state->ptr - ptr;
646 }
647
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000648 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000649 return ptr - (SRE_CHAR*) state->ptr;
650}
651
Fredrik Lundh33accc12000-08-27 20:59:47 +0000652#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000653LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000654SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
655{
656 /* check if an SRE_OP_INFO block matches at the current position.
657 returns the number of SRE_CODE objects to skip if successful, 0
658 if no match */
659
660 SRE_CHAR* end = state->end;
661 SRE_CHAR* ptr = state->ptr;
662 int i;
663
664 /* check minimal length */
665 if (pattern[3] && (end - ptr) < pattern[3])
666 return 0;
667
668 /* check known prefix */
669 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
670 /* <length> <skip> <prefix data> <overlap data> */
671 for (i = 0; i < pattern[5]; i++)
672 if ((SRE_CODE) ptr[i] != pattern[7 + i])
673 return 0;
674 return pattern[0] + 2 * pattern[6];
675 }
676 return pattern[0];
677}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000678#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000679
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000680/* macros to preserve lastmark in case of backtracking */
681#define LASTMARK_SAVE() \
682 do { \
683 lastmark = state->lastmark; \
684 lastindex = state->lastindex; \
685 } while (0)
686#define LASTMARK_RESTORE() \
687 do { \
688 if (state->lastmark > lastmark) { \
689 memset(state->mark + lastmark + 1, 0, \
690 (state->lastmark - lastmark) * sizeof(void*)); \
691 state->lastmark = lastmark; \
692 state->lastindex = lastindex; \
693 } \
694 } while (0)
695
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000696LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000697SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000698{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000699 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000700 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000701
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000702 SRE_CHAR* end = state->end;
703 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000704 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000705 SRE_REPEAT* rp;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000706 int lastmark, lastindex;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000707 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000708
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000709 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000711 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000712
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000713#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000714 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000715 return SRE_ERROR_RECURSION_LIMIT;
716#endif
717
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000718#if defined(USE_RECURSION_LIMIT)
719 if (level > USE_RECURSION_LIMIT)
720 return SRE_ERROR_RECURSION_LIMIT;
721#endif
722
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000723 if (pattern[0] == SRE_OP_INFO) {
724 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000725 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000726 if (pattern[3] && (end - ptr) < pattern[3]) {
727 TRACE(("reject (got %d chars, need %d)\n",
728 (end - ptr), pattern[3]));
729 return 0;
730 }
731 pattern += pattern[1] + 1;
732 }
733
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000734 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000735
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000737
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 case SRE_OP_FAILURE:
739 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000740 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000741 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000742
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 case SRE_OP_SUCCESS:
744 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000745 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000747 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000749 case SRE_OP_AT:
750 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000751 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000752 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000754 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 pattern++;
756 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000757
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000758 case SRE_OP_CATEGORY:
759 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000760 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000761 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000763 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000764 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000765 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000766 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000767
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000768 case SRE_OP_LITERAL:
769 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000770 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000771 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000772 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000773 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000774 pattern++;
775 ptr++;
776 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000777
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000778 case SRE_OP_NOT_LITERAL:
779 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000780 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000781 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000782 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000783 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000784 pattern++;
785 ptr++;
786 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000787
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000788 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000789 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000790 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000791 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000792 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
793 return 0;
794 ptr++;
795 break;
796
797 case SRE_OP_ANY_ALL:
798 /* match anything */
799 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000800 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000801 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000802 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000803 ptr++;
804 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000805
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000806 case SRE_OP_IN:
807 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000808 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000809 TRACE(("|%p|%p|IN\n", pattern, ptr));
810 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000811 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000812 pattern += pattern[0];
813 ptr++;
814 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000815
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000816 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000818 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000819 i = pattern[0];
820 {
821 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
822 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
823 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000824 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000825 while (p < e) {
826 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000827 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000828 p++; ptr++;
829 }
830 }
831 pattern++;
832 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000833
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000834 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000835 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000836 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000837 i = pattern[0];
838 {
839 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
840 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
841 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000842 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000843 while (p < e) {
844 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000845 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000846 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000847 p++; ptr++;
848 }
849 }
850 pattern++;
851 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000852
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000853 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000854 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000855 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000856 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000857 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000858 pattern++;
859 ptr++;
860 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000862 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000863 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000864 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000865 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000866 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000867 pattern++;
868 ptr++;
869 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000870
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000871 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000872 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000873 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000874 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000875 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000876 pattern += pattern[0];
877 ptr++;
878 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000879
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000880 case SRE_OP_MARK:
881 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000882 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000883 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000884 i = pattern[0];
Gustavo Niemeyer1aca3592003-04-20 00:45:13 +0000885 if (i & 1)
886 state->lastindex = i/2 + 1;
887 if (i > state->lastmark)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000888 state->lastmark = i;
889 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000890 pattern++;
891 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000892
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000893 case SRE_OP_JUMP:
894 case SRE_OP_INFO:
895 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000896 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000897 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000898 pattern += pattern[0];
899 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000900
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000901 case SRE_OP_ASSERT:
902 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000903 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000904 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000905 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000906 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000907 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000908 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000909 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000910 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000911 pattern += pattern[0];
912 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000913
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000914 case SRE_OP_ASSERT_NOT:
915 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000916 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000917 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000918 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000919 if (state->ptr >= state->beginning) {
920 i = SRE_MATCH(state, pattern + 2, level + 1);
921 if (i < 0)
922 return i;
923 if (i)
924 return 0;
925 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000926 pattern += pattern[0];
927 break;
928
929 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000930 /* alternation */
931 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000932 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000933 LASTMARK_SAVE();
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000934 for (; pattern[0]; pattern += pattern[0]) {
935 if (pattern[1] == SRE_OP_LITERAL &&
936 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
937 continue;
938 if (pattern[1] == SRE_OP_IN &&
939 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
940 continue;
941 state->ptr = ptr;
942 i = SRE_MATCH(state, pattern + 1, level + 1);
943 if (i)
944 return i;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000945 LASTMARK_RESTORE();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000946 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000947 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000948
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000949 case SRE_OP_REPEAT_ONE:
950 /* match repeated sequence (maximizing regexp) */
951
952 /* this operator only works if the repeated item is
953 exactly one character wide, and we're not already
954 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000955 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000956
957 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
958
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000959 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000960 pattern[1], pattern[2]));
961
Fredrik Lundhe1869832000-08-01 22:47:49 +0000962 if (ptr + pattern[1] > end)
963 return 0; /* cannot match */
964
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000965 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000966
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000967 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
968 if (count < 0)
969 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000970
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000971 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000972
973 /* when we arrive here, count contains the number of
974 matches, and ptr points to the tail of the target
975 string. check if the rest of the pattern matches,
976 and backtrack if not. */
977
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000978 if (count < (int) pattern[1])
979 return 0;
980
981 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
982 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000983 state->ptr = ptr;
984 return 1;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000985 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000986
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000987 LASTMARK_SAVE();
988
989 if (pattern[pattern[0]] == SRE_OP_LITERAL) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000990 /* tail starts with a literal. skip positions where
991 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000992 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000993 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000994 while (count >= (int) pattern[1] &&
995 (ptr >= end || *ptr != chr)) {
996 ptr--;
997 count--;
998 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000999 if (count < (int) pattern[1])
1000 break;
1001 state->ptr = ptr;
1002 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001003 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +00001004 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001005 ptr--;
1006 count--;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001007 LASTMARK_RESTORE();
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001008 }
1009
1010 } else {
1011 /* general case */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001012 while (count >= (int) pattern[1]) {
1013 state->ptr = ptr;
1014 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001015 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +00001016 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001017 ptr--;
1018 count--;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001019 LASTMARK_RESTORE();
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001020 }
1021 }
1022 return 0;
1023
Guido van Rossum41c99e72003-04-14 17:59:34 +00001024 case SRE_OP_MIN_REPEAT_ONE:
1025 /* match repeated sequence (minimizing regexp) */
1026
1027 /* this operator only works if the repeated item is
1028 exactly one character wide, and we're not already
1029 collecting backtracking points. for other cases,
1030 use the MIN_REPEAT operator */
1031
1032 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1033
1034 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
1035 pattern[1], pattern[2]));
1036
1037 if (ptr + pattern[1] > end)
1038 return 0; /* cannot match */
1039
1040 state->ptr = ptr;
1041
1042 if (pattern[1] == 0)
1043 count = 0;
1044 else {
1045 /* count using pattern min as the maximum */
1046 count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1);
1047
1048 if (count < 0)
1049 return count; /* exception */
1050 if (count < (int) pattern[1])
1051 return 0; /* did not match minimum number of times */
1052 ptr += count; /* advance past minimum matches of repeat */
1053 }
1054
1055 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
1056 /* tail is empty. we're finished */
1057 state->ptr = ptr;
1058 return 1;
1059
1060 } else {
1061 /* general case */
1062 int matchmax = ((int)pattern[2] == 65535);
1063 int c;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001064 LASTMARK_SAVE();
Guido van Rossum41c99e72003-04-14 17:59:34 +00001065 while (matchmax || count <= (int) pattern[2]) {
1066 state->ptr = ptr;
1067 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
1068 if (i)
1069 return i;
1070 state->ptr = ptr;
1071 c = SRE_COUNT(state, pattern+3, 1, level+1);
1072 if (c < 0)
1073 return c;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001074 LASTMARK_RESTORE();
Guido van Rossum41c99e72003-04-14 17:59:34 +00001075 if (c == 0)
1076 break;
1077 assert(c == 1);
1078 ptr++;
1079 count++;
1080 }
Guido van Rossum41c99e72003-04-14 17:59:34 +00001081 }
1082 return 0;
1083
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001084 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001085 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001086 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001087 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001088 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001089 pattern[1], pattern[2]));
1090
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001091 rep.count = -1;
1092 rep.pattern = pattern;
1093
1094 /* install new repeat context */
1095 rep.prev = state->repeat;
1096 state->repeat = &rep;
1097
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001098 state->ptr = ptr;
1099 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001100
1101 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001102
1103 return i;
1104
1105 case SRE_OP_MAX_UNTIL:
1106 /* maximizing repeat */
1107 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1108
1109 /* FIXME: we probably need to deal with zero-width
1110 matches in here... */
1111
1112 rp = state->repeat;
1113 if (!rp)
1114 return SRE_ERROR_STATE;
1115
1116 state->ptr = ptr;
1117
1118 count = rp->count + 1;
1119
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001120 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001121
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001122 LASTMARK_SAVE();
1123
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001124 if (count < rp->pattern[1]) {
1125 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001126 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001127 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001128 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001129 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001130 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001131 rp->count = count - 1;
1132 state->ptr = ptr;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001133 LASTMARK_RESTORE();
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001134 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001135 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001136
1137 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001138 /* we may have enough matches, but if we can
1139 match another item, do so */
1140 rp->count = count;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001141 i = mark_save(state, 0, lastmark);
1142 if (i < 0)
1143 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001144 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001145 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001146 if (i)
1147 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001148 i = mark_restore(state, 0, lastmark);
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001149 LASTMARK_RESTORE();
Fredrik Lundh33accc12000-08-27 20:59:47 +00001150 if (i < 0)
1151 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001152 rp->count = count - 1;
1153 state->ptr = ptr;
1154 }
1155
1156 /* cannot match more repeated items here. make sure the
1157 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001158 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001159 i = SRE_MATCH(state, pattern, level + 1);
1160 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001161 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001162 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001163 state->ptr = ptr;
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001164 LASTMARK_RESTORE();
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001165 return 0;
1166
1167 case SRE_OP_MIN_UNTIL:
1168 /* minimizing repeat */
1169 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1170
1171 rp = state->repeat;
1172 if (!rp)
1173 return SRE_ERROR_STATE;
1174
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001175 state->ptr = ptr;
1176
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001177 count = rp->count + 1;
1178
Fredrik Lundh770617b2001-01-14 15:06:11 +00001179 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1180 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001181
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001182 LASTMARK_SAVE();
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001183
1184 if (count < rp->pattern[1]) {
1185 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001186 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001187 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001188 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001189 if (i)
1190 return i;
1191 rp->count = count-1;
1192 state->ptr = ptr;
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001193 LASTMARK_RESTORE();
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001194 return 0;
1195 }
1196
1197 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001198 state->repeat = rp->prev;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001199 i = SRE_MATCH(state, pattern, level + 1);
1200 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001201 return i;
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001202
Fredrik Lundh770617b2001-01-14 15:06:11 +00001203 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001204 state->repeat = rp;
1205
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001206 LASTMARK_RESTORE();
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001207 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1208 return 0;
1209
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001210 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001211 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001212 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001213 if (i)
1214 return i;
1215 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001216 state->ptr = ptr;
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001217 LASTMARK_RESTORE();
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001218 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001219
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001220 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001221 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001222 return SRE_ERROR_ILLEGAL;
1223 }
1224 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001225
Sjoerd Mullender89dfe9e2001-08-30 14:37:07 +00001226 /* can't end up here */
Fredrik Lundh21009b92001-09-18 18:47:09 +00001227 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
Guido van Rossumb700df92000-03-31 14:59:30 +00001228}
1229
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001230LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001231SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1232{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001233 SRE_CHAR* ptr = state->start;
1234 SRE_CHAR* end = state->end;
1235 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001236 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001237 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001238 SRE_CODE* prefix = NULL;
1239 SRE_CODE* charset = NULL;
1240 SRE_CODE* overlap = NULL;
1241 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001242
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001243 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001244 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001245 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001246
1247 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001248
1249 if (pattern[3] > 0) {
1250 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001251 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001252 end -= pattern[3]-1;
1253 if (end <= ptr)
1254 end = ptr+1;
1255 }
1256
Fredrik Lundh3562f112000-07-02 12:00:07 +00001257 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001258 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001259 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001260 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001261 prefix_skip = pattern[6];
1262 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001263 overlap = prefix + prefix_len - 1;
1264 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001265 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001266 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001267 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001268
1269 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001270 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001271
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001272 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1273 TRACE(("charset = %p\n", charset));
1274
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001275#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001276 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001277 /* pattern starts with a known prefix. use the overlap
1278 table to skip forward as fast as we possibly can */
1279 int i = 0;
1280 end = state->end;
1281 while (ptr < end) {
1282 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001283 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001284 if (!i)
1285 break;
1286 else
1287 i = overlap[i];
1288 } else {
1289 if (++i == prefix_len) {
1290 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001291 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1292 state->start = ptr + 1 - prefix_len;
1293 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001294 if (flags & SRE_INFO_LITERAL)
1295 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001296 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001297 if (status != 0)
1298 return status;
1299 /* close but no cigar -- try again */
1300 i = overlap[i];
1301 }
1302 break;
1303 }
1304
1305 }
1306 ptr++;
1307 }
1308 return 0;
1309 }
1310#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001311
Fredrik Lundh3562f112000-07-02 12:00:07 +00001312 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001313 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001314 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001315 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001316 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001317 for (;;) {
1318 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1319 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001320 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001321 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001322 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001323 state->start = ptr;
1324 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001325 if (flags & SRE_INFO_LITERAL)
1326 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001327 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001328 if (status != 0)
1329 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001330 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001331 } else if (charset) {
1332 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001333 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001334 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001335 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001336 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001337 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001338 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001339 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001340 state->start = ptr;
1341 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001342 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001343 if (status != 0)
1344 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001345 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001346 }
1347 } else
1348 /* general case */
1349 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001350 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001351 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001352 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001353 if (status != 0)
1354 break;
1355 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001357 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001358}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001359
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001360LOCAL(int)
1361SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1362{
1363 /* check if given string is a literal template (i.e. no escapes) */
1364 while (len-- > 0)
1365 if (*ptr++ == '\\')
1366 return 0;
1367 return 1;
1368}
Guido van Rossumb700df92000-03-31 14:59:30 +00001369
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001370#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001371
1372/* -------------------------------------------------------------------- */
1373/* factories and destructors */
1374
1375/* see sre.h for object declarations */
1376
Jeremy Hylton938ace62002-07-17 16:30:39 +00001377static PyTypeObject Pattern_Type;
1378static PyTypeObject Match_Type;
1379static PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001380
1381static PyObject *
1382_compile(PyObject* self_, PyObject* args)
1383{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001384 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001385
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001386 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001387 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001389 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001390 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001391 PyObject* code;
1392 int groups = 0;
1393 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001394 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001395 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1396 &PyList_Type, &code, &groups,
1397 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001398 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001399
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001400 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001401
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001402 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001403 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001404 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001405
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001406 self->codesize = n;
1407
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001408 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001409 PyObject *o = PyList_GET_ITEM(code, i);
Martin v. Löwis78e2f062003-04-19 12:56:08 +00001410 if (PyInt_Check(o))
1411 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
1412 else
1413 self->code[i] = (SRE_CODE) PyLong_AsUnsignedLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001414 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001415
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001416 if (PyErr_Occurred()) {
1417 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001418 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001419 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001420
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001421 Py_INCREF(pattern);
1422 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001423
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001424 self->flags = flags;
1425
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001426 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001427
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001428 Py_XINCREF(groupindex);
1429 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001430
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001431 Py_XINCREF(indexgroup);
1432 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001433
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001434 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001435}
1436
1437static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001438sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001439{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001440 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001441}
1442
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001443static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001444sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001445{
1446 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001447 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001448 return NULL;
1449 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001450 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001451 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001452#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001453 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001454#else
1455 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001456#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001457 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001458}
1459
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001460LOCAL(void)
1461state_reset(SRE_STATE* state)
1462{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001463 state->lastmark = 0;
1464
1465 /* FIXME: dynamic! */
Neal Norwitz35fc7602002-06-13 21:11:11 +00001466 memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001467
1468 state->lastindex = -1;
1469
1470 state->repeat = NULL;
1471
1472 mark_fini(state);
1473}
1474
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001475static void*
1476getstring(PyObject* string, int* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001477{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001478 /* given a python object, return a data pointer, a length (in
1479 characters), and a character size. return NULL if the object
1480 is not a string (or not compatible) */
1481
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001482 PyBufferProcs *buffer;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001483 int size, bytes, charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001484 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001485
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001486#if defined(HAVE_UNICODE)
1487 if (PyUnicode_Check(string)) {
1488 /* unicode strings doesn't always support the buffer interface */
1489 ptr = (void*) PyUnicode_AS_DATA(string);
1490 bytes = PyUnicode_GET_DATA_SIZE(string);
1491 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001492 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001493
1494 } else {
1495#endif
1496
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001497 /* get pointer to string buffer */
1498 buffer = string->ob_type->tp_as_buffer;
1499 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1500 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001501 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001502 return NULL;
1503 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001504
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001505 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001506 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1507 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001508 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1509 return NULL;
1510 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001511
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001512 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001513#if PY_VERSION_HEX >= 0x01060000
1514 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001515#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001516 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001517#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001518
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001519 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001520 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001521#if defined(HAVE_UNICODE)
1522 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001523 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001524#endif
1525 else {
1526 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1527 return NULL;
1528 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001529
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001530#if defined(HAVE_UNICODE)
1531 }
1532#endif
1533
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001534 *p_length = size;
1535 *p_charsize = charsize;
1536
1537 return ptr;
1538}
1539
1540LOCAL(PyObject*)
1541state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1542 int start, int end)
1543{
1544 /* prepare state object */
1545
1546 int length;
1547 int charsize;
1548 void* ptr;
1549
1550 memset(state, 0, sizeof(SRE_STATE));
1551
1552 state->lastindex = -1;
1553
1554 ptr = getstring(string, &length, &charsize);
1555 if (!ptr)
1556 return NULL;
1557
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001558 /* adjust boundaries */
1559 if (start < 0)
1560 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001561 else if (start > length)
1562 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001563
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001564 if (end < 0)
1565 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001566 else if (end > length)
1567 end = length;
1568
1569 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001570
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001571 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001572
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001573 state->start = (void*) ((char*) ptr + start * state->charsize);
1574 state->end = (void*) ((char*) ptr + end * state->charsize);
1575
1576 Py_INCREF(string);
1577 state->string = string;
1578 state->pos = start;
1579 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001580
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001581 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001582 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001583 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001584#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001585 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001586#else
1587 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001588#endif
1589 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001590 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001591
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001592 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001593}
1594
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001595LOCAL(void)
1596state_fini(SRE_STATE* state)
1597{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001598 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001599 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001600}
1601
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001602/* calculate offset from start of string */
1603#define STATE_OFFSET(state, member)\
1604 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1605
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001606LOCAL(PyObject*)
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001607state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001608{
Fredrik Lundh58100642000-08-09 09:14:35 +00001609 int i, j;
1610
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001611 index = (index - 1) * 2;
1612
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001613 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001614 if (empty)
1615 /* want empty string */
1616 i = j = 0;
1617 else {
1618 Py_INCREF(Py_None);
1619 return Py_None;
1620 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001621 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001622 i = STATE_OFFSET(state, state->mark[index]);
1623 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001624 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001625
Fredrik Lundh58100642000-08-09 09:14:35 +00001626 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001627}
1628
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001629static void
1630pattern_error(int status)
1631{
1632 switch (status) {
1633 case SRE_ERROR_RECURSION_LIMIT:
1634 PyErr_SetString(
1635 PyExc_RuntimeError,
1636 "maximum recursion limit exceeded"
1637 );
1638 break;
1639 case SRE_ERROR_MEMORY:
1640 PyErr_NoMemory();
1641 break;
1642 default:
1643 /* other error codes indicate compiler/engine bugs */
1644 PyErr_SetString(
1645 PyExc_RuntimeError,
1646 "internal error in regular expression engine"
1647 );
1648 }
1649}
1650
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001651static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001652pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001653{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001654 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001655
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001656 MatchObject* match;
1657 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001658 char* base;
1659 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001660
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001661 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001662
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001663 /* create match object (with room for extra group marks) */
1664 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001665 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001666 if (!match)
1667 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001668
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001669 Py_INCREF(pattern);
1670 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001671
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001672 Py_INCREF(state->string);
1673 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001674
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001675 match->regs = NULL;
1676 match->groups = pattern->groups+1;
1677
1678 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001679
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001680 base = (char*) state->beginning;
1681 n = state->charsize;
1682
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001683 match->mark[0] = ((char*) state->start - base) / n;
1684 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001685
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001686 for (i = j = 0; i < pattern->groups; i++, j+=2)
1687 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1688 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1689 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1690 } else
1691 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1692
1693 match->pos = state->pos;
1694 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001695
Fredrik Lundh6f013982000-07-03 18:44:21 +00001696 match->lastindex = state->lastindex;
1697
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001698 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001699
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001700 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001701
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001702 /* no match */
1703 Py_INCREF(Py_None);
1704 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001705
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001706 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001707
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001708 /* internal error */
1709 pattern_error(status);
1710 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001711}
1712
1713static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001714pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001715{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001716 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001717
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001718 ScannerObject* self;
1719
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001720 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001721 int start = 0;
1722 int end = INT_MAX;
1723 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1724 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001725
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001726 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001727 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001728 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001729 return NULL;
1730
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001731 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001732 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001733 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001734 return NULL;
1735 }
1736
1737 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001738 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001739
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001740 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001741}
1742
Guido van Rossumb700df92000-03-31 14:59:30 +00001743static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001744pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001745{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001746 Py_XDECREF(self->pattern);
1747 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001748 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001749 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001750}
1751
1752static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001753pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001754{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001755 SRE_STATE state;
1756 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001757
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001758 PyObject* string;
1759 int start = 0;
1760 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001761 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1762 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1763 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001764 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001765
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001766 string = state_init(&state, self, string, start, end);
1767 if (!string)
1768 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001769
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001770 state.ptr = state.start;
1771
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001772 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1773
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001774 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001775 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001776 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001777#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001778 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001779#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001780 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001781
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001782 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1783
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001784 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001785
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001786 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001787}
1788
1789static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001790pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001791{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001792 SRE_STATE state;
1793 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001794
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001795 PyObject* string;
1796 int start = 0;
1797 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001798 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1799 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1800 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001801 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001802
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001803 string = state_init(&state, self, string, start, end);
1804 if (!string)
1805 return NULL;
1806
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001807 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1808
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001809 if (state.charsize == 1) {
1810 status = sre_search(&state, PatternObject_GetCode(self));
1811 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001812#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001813 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001814#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001815 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001816
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001817 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1818
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001819 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001820
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001821 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001822}
1823
1824static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001825call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001826{
1827 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001828 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001829 PyObject* func;
1830 PyObject* result;
1831
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001832 if (!args)
1833 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001834 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001835 if (!name)
1836 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001837 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001838 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001839 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001840 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001841 func = PyObject_GetAttrString(mod, function);
1842 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001843 if (!func)
1844 return NULL;
1845 result = PyObject_CallObject(func, args);
1846 Py_DECREF(func);
1847 Py_DECREF(args);
1848 return result;
1849}
1850
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001851#ifdef USE_BUILTIN_COPY
1852static int
1853deepcopy(PyObject** object, PyObject* memo)
1854{
1855 PyObject* copy;
1856
1857 copy = call(
1858 "copy", "deepcopy",
1859 Py_BuildValue("OO", *object, memo)
1860 );
1861 if (!copy)
1862 return 0;
1863
1864 Py_DECREF(*object);
1865 *object = copy;
1866
1867 return 1; /* success */
1868}
1869#endif
1870
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001871static PyObject*
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00001872join_list(PyObject* list, PyObject* pattern)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001873{
1874 /* join list elements */
1875
1876 PyObject* joiner;
1877#if PY_VERSION_HEX >= 0x01060000
1878 PyObject* function;
1879 PyObject* args;
1880#endif
1881 PyObject* result;
1882
1883 switch (PyList_GET_SIZE(list)) {
1884 case 0:
1885 Py_DECREF(list);
Fredrik Lundh09705f02002-11-22 12:46:35 +00001886 return PySequence_GetSlice(pattern, 0, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001887 case 1:
1888 result = PyList_GET_ITEM(list, 0);
1889 Py_INCREF(result);
1890 Py_DECREF(list);
1891 return result;
1892 }
1893
1894 /* two or more elements: slice out a suitable separator from the
1895 first member, and use that to join the entire list */
1896
1897 joiner = PySequence_GetSlice(pattern, 0, 0);
1898 if (!joiner)
1899 return NULL;
1900
1901#if PY_VERSION_HEX >= 0x01060000
1902 function = PyObject_GetAttrString(joiner, "join");
1903 if (!function) {
1904 Py_DECREF(joiner);
1905 return NULL;
1906 }
1907 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001908 if (!args) {
1909 Py_DECREF(function);
1910 Py_DECREF(joiner);
1911 return NULL;
1912 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001913 PyTuple_SET_ITEM(args, 0, list);
1914 result = PyObject_CallObject(function, args);
1915 Py_DECREF(args); /* also removes list */
1916 Py_DECREF(function);
1917#else
1918 result = call(
1919 "string", "join",
1920 Py_BuildValue("OO", list, joiner)
1921 );
1922#endif
1923 Py_DECREF(joiner);
1924
1925 return result;
1926}
1927
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001928static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001929pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001930{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001931 SRE_STATE state;
1932 PyObject* list;
1933 int status;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001934 int i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00001935
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001936 PyObject* string;
1937 int start = 0;
1938 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001939 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1940 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1941 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001942 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001943
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001944 string = state_init(&state, self, string, start, end);
1945 if (!string)
1946 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001947
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001948 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001949 if (!list) {
1950 state_fini(&state);
1951 return NULL;
1952 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001953
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001954 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001955
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001956 PyObject* item;
1957
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001958 state_reset(&state);
1959
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001960 state.ptr = state.start;
1961
1962 if (state.charsize == 1) {
1963 status = sre_search(&state, PatternObject_GetCode(self));
1964 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001965#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001966 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001967#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001968 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001969
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001970 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001971 if (status == 0)
1972 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001973 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001974 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001975 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001976
1977 /* don't bother to build a match object */
1978 switch (self->groups) {
1979 case 0:
1980 b = STATE_OFFSET(&state, state.start);
1981 e = STATE_OFFSET(&state, state.ptr);
1982 item = PySequence_GetSlice(string, b, e);
1983 if (!item)
1984 goto error;
1985 break;
1986 case 1:
1987 item = state_getslice(&state, 1, string, 1);
1988 if (!item)
1989 goto error;
1990 break;
1991 default:
1992 item = PyTuple_New(self->groups);
1993 if (!item)
1994 goto error;
1995 for (i = 0; i < self->groups; i++) {
1996 PyObject* o = state_getslice(&state, i+1, string, 1);
1997 if (!o) {
1998 Py_DECREF(item);
1999 goto error;
2000 }
2001 PyTuple_SET_ITEM(item, i, o);
2002 }
2003 break;
2004 }
2005
2006 status = PyList_Append(list, item);
2007 Py_DECREF(item);
2008 if (status < 0)
2009 goto error;
2010
2011 if (state.ptr == state.start)
2012 state.start = (void*) ((char*) state.ptr + state.charsize);
2013 else
2014 state.start = state.ptr;
2015
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002016 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002017
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002018 state_fini(&state);
2019 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00002020
2021error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002022 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002023 state_fini(&state);
2024 return NULL;
2025
Guido van Rossumb700df92000-03-31 14:59:30 +00002026}
2027
Fredrik Lundh703ce812001-10-24 22:16:30 +00002028#if PY_VERSION_HEX >= 0x02020000
2029static PyObject*
2030pattern_finditer(PatternObject* pattern, PyObject* args)
2031{
2032 PyObject* scanner;
2033 PyObject* search;
2034 PyObject* iterator;
2035
2036 scanner = pattern_scanner(pattern, args);
2037 if (!scanner)
2038 return NULL;
2039
2040 search = PyObject_GetAttrString(scanner, "search");
2041 Py_DECREF(scanner);
2042 if (!search)
2043 return NULL;
2044
2045 iterator = PyCallIter_New(search, Py_None);
2046 Py_DECREF(search);
2047
2048 return iterator;
2049}
2050#endif
2051
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002052static PyObject*
2053pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2054{
2055 SRE_STATE state;
2056 PyObject* list;
2057 PyObject* item;
2058 int status;
2059 int n;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002060 int i;
2061 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002062
2063 PyObject* string;
2064 int maxsplit = 0;
2065 static char* kwlist[] = { "source", "maxsplit", NULL };
2066 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
2067 &string, &maxsplit))
2068 return NULL;
2069
2070 string = state_init(&state, self, string, 0, INT_MAX);
2071 if (!string)
2072 return NULL;
2073
2074 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002075 if (!list) {
2076 state_fini(&state);
2077 return NULL;
2078 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002079
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002080 n = 0;
2081 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002082
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002083 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002084
2085 state_reset(&state);
2086
2087 state.ptr = state.start;
2088
2089 if (state.charsize == 1) {
2090 status = sre_search(&state, PatternObject_GetCode(self));
2091 } else {
2092#if defined(HAVE_UNICODE)
2093 status = sre_usearch(&state, PatternObject_GetCode(self));
2094#endif
2095 }
2096
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002097 if (status <= 0) {
2098 if (status == 0)
2099 break;
2100 pattern_error(status);
2101 goto error;
2102 }
2103
2104 if (state.start == state.ptr) {
2105 if (last == state.end)
2106 break;
2107 /* skip one character */
2108 state.start = (void*) ((char*) state.ptr + state.charsize);
2109 continue;
2110 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002111
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002112 /* get segment before this match */
2113 item = PySequence_GetSlice(
2114 string, STATE_OFFSET(&state, last),
2115 STATE_OFFSET(&state, state.start)
2116 );
2117 if (!item)
2118 goto error;
2119 status = PyList_Append(list, item);
2120 Py_DECREF(item);
2121 if (status < 0)
2122 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002123
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002124 /* add groups (if any) */
2125 for (i = 0; i < self->groups; i++) {
2126 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002127 if (!item)
2128 goto error;
2129 status = PyList_Append(list, item);
2130 Py_DECREF(item);
2131 if (status < 0)
2132 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002133 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002134
2135 n = n + 1;
2136
2137 last = state.start = state.ptr;
2138
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002139 }
2140
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002141 /* get segment following last match (even if empty) */
2142 item = PySequence_GetSlice(
2143 string, STATE_OFFSET(&state, last), state.endpos
2144 );
2145 if (!item)
2146 goto error;
2147 status = PyList_Append(list, item);
2148 Py_DECREF(item);
2149 if (status < 0)
2150 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002151
2152 state_fini(&state);
2153 return list;
2154
2155error:
2156 Py_DECREF(list);
2157 state_fini(&state);
2158 return NULL;
2159
2160}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002161
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002162static PyObject*
2163pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2164 int count, int subn)
2165{
2166 SRE_STATE state;
2167 PyObject* list;
2168 PyObject* item;
2169 PyObject* filter;
2170 PyObject* args;
2171 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002172 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002173 int status;
2174 int n;
2175 int i, b, e;
2176 int filter_is_callable;
2177
Fredrik Lundhdac58492001-10-21 21:48:30 +00002178 if (PyCallable_Check(template)) {
2179 /* sub/subn takes either a function or a template */
2180 filter = template;
2181 Py_INCREF(filter);
2182 filter_is_callable = 1;
2183 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002184 /* if not callable, check if it's a literal string */
2185 int literal;
2186 ptr = getstring(template, &n, &b);
2187 if (ptr) {
2188 if (b == 1) {
2189 literal = sre_literal_template(ptr, n);
2190 } else {
2191#if defined(HAVE_UNICODE)
2192 literal = sre_uliteral_template(ptr, n);
2193#endif
2194 }
2195 } else {
2196 PyErr_Clear();
2197 literal = 0;
2198 }
2199 if (literal) {
2200 filter = template;
2201 Py_INCREF(filter);
2202 filter_is_callable = 0;
2203 } else {
2204 /* not a literal; hand it over to the template compiler */
2205 filter = call(
2206 SRE_MODULE, "_subx",
2207 Py_BuildValue("OO", self, template)
2208 );
2209 if (!filter)
2210 return NULL;
2211 filter_is_callable = PyCallable_Check(filter);
2212 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002213 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002214
2215 string = state_init(&state, self, string, 0, INT_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002216 if (!string) {
2217 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002218 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002219 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002220
2221 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002222 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002223 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002224 state_fini(&state);
2225 return NULL;
2226 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002227
2228 n = i = 0;
2229
2230 while (!count || n < count) {
2231
2232 state_reset(&state);
2233
2234 state.ptr = state.start;
2235
2236 if (state.charsize == 1) {
2237 status = sre_search(&state, PatternObject_GetCode(self));
2238 } else {
2239#if defined(HAVE_UNICODE)
2240 status = sre_usearch(&state, PatternObject_GetCode(self));
2241#endif
2242 }
2243
2244 if (status <= 0) {
2245 if (status == 0)
2246 break;
2247 pattern_error(status);
2248 goto error;
2249 }
2250
2251 b = STATE_OFFSET(&state, state.start);
2252 e = STATE_OFFSET(&state, state.ptr);
2253
2254 if (i < b) {
2255 /* get segment before this match */
2256 item = PySequence_GetSlice(string, i, b);
2257 if (!item)
2258 goto error;
2259 status = PyList_Append(list, item);
2260 Py_DECREF(item);
2261 if (status < 0)
2262 goto error;
2263
2264 } else if (i == b && i == e && n > 0)
2265 /* ignore empty match on latest position */
2266 goto next;
2267
2268 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002269 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002270 match = pattern_new_match(self, &state, 1);
2271 if (!match)
2272 goto error;
2273 args = Py_BuildValue("(O)", match);
2274 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002275 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002276 goto error;
2277 }
2278 item = PyObject_CallObject(filter, args);
2279 Py_DECREF(args);
2280 Py_DECREF(match);
2281 if (!item)
2282 goto error;
2283 } else {
2284 /* filter is literal string */
2285 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002286 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002287 }
2288
2289 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002290 if (item != Py_None) {
2291 status = PyList_Append(list, item);
2292 Py_DECREF(item);
2293 if (status < 0)
2294 goto error;
2295 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002296
2297 i = e;
2298 n = n + 1;
2299
2300next:
2301 /* move on */
2302 if (state.ptr == state.start)
2303 state.start = (void*) ((char*) state.ptr + state.charsize);
2304 else
2305 state.start = state.ptr;
2306
2307 }
2308
2309 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002310 if (i < state.endpos) {
2311 item = PySequence_GetSlice(string, i, state.endpos);
2312 if (!item)
2313 goto error;
2314 status = PyList_Append(list, item);
2315 Py_DECREF(item);
2316 if (status < 0)
2317 goto error;
2318 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002319
2320 state_fini(&state);
2321
Guido van Rossum4e173842001-12-07 04:25:10 +00002322 Py_DECREF(filter);
2323
Fredrik Lundhdac58492001-10-21 21:48:30 +00002324 /* convert list to single string (also removes list) */
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00002325 item = join_list(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002326
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002327 if (!item)
2328 return NULL;
2329
2330 if (subn)
2331 return Py_BuildValue("Ni", item, n);
2332
2333 return item;
2334
2335error:
2336 Py_DECREF(list);
2337 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002338 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002339 return NULL;
2340
2341}
2342
2343static PyObject*
2344pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2345{
2346 PyObject* template;
2347 PyObject* string;
2348 int count = 0;
2349 static char* kwlist[] = { "repl", "string", "count", NULL };
2350 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2351 &template, &string, &count))
2352 return NULL;
2353
2354 return pattern_subx(self, template, string, count, 0);
2355}
2356
2357static PyObject*
2358pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2359{
2360 PyObject* template;
2361 PyObject* string;
2362 int count = 0;
2363 static char* kwlist[] = { "repl", "string", "count", NULL };
2364 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2365 &template, &string, &count))
2366 return NULL;
2367
2368 return pattern_subx(self, template, string, count, 1);
2369}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002370
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002371static PyObject*
2372pattern_copy(PatternObject* self, PyObject* args)
2373{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002374#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002375 PatternObject* copy;
2376 int offset;
2377
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002378 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2379 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002380
2381 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2382 if (!copy)
2383 return NULL;
2384
2385 offset = offsetof(PatternObject, groups);
2386
2387 Py_XINCREF(self->groupindex);
2388 Py_XINCREF(self->indexgroup);
2389 Py_XINCREF(self->pattern);
2390
2391 memcpy((char*) copy + offset, (char*) self + offset,
2392 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2393
2394 return (PyObject*) copy;
2395#else
2396 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2397 return NULL;
2398#endif
2399}
2400
2401static PyObject*
2402pattern_deepcopy(PatternObject* self, PyObject* args)
2403{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002404#ifdef USE_BUILTIN_COPY
2405 PatternObject* copy;
2406
2407 PyObject* memo;
2408 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2409 return NULL;
2410
2411 copy = (PatternObject*) pattern_copy(self, Py_None);
2412 if (!copy)
2413 return NULL;
2414
2415 if (!deepcopy(&copy->groupindex, memo) ||
2416 !deepcopy(&copy->indexgroup, memo) ||
2417 !deepcopy(&copy->pattern, memo)) {
2418 Py_DECREF(copy);
2419 return NULL;
2420 }
2421
2422#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002423 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2424 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002425#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002426}
2427
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002428static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002429 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2430 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2431 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2432 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2433 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2434 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002435#if PY_VERSION_HEX >= 0x02020000
2436 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS},
2437#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002438 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002439 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2440 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002441 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002442};
2443
2444static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002445pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002446{
2447 PyObject* res;
2448
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002449 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002450
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002451 if (res)
2452 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002453
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002454 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002455
2456 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002457 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002458 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002459 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002460 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002461
2462 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002463 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002464
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002465 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002466 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002467
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002468 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002469 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002470 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002471 }
2472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002473 PyErr_SetString(PyExc_AttributeError, name);
2474 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002475}
2476
2477statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002478 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002479 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002480 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002481 (destructor)pattern_dealloc, /*tp_dealloc*/
2482 0, /*tp_print*/
2483 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002484};
2485
2486/* -------------------------------------------------------------------- */
2487/* match methods */
2488
2489static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002490match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002491{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002492 Py_XDECREF(self->regs);
2493 Py_XDECREF(self->string);
2494 Py_DECREF(self->pattern);
2495 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002496}
2497
2498static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002499match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002500{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002501 if (index < 0 || index >= self->groups) {
2502 /* raise IndexError if we were given a bad group number */
2503 PyErr_SetString(
2504 PyExc_IndexError,
2505 "no such group"
2506 );
2507 return NULL;
2508 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002509
Fredrik Lundh6f013982000-07-03 18:44:21 +00002510 index *= 2;
2511
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002512 if (self->string == Py_None || self->mark[index] < 0) {
2513 /* return default value if the string or group is undefined */
2514 Py_INCREF(def);
2515 return def;
2516 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002517
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002518 return PySequence_GetSlice(
2519 self->string, self->mark[index], self->mark[index+1]
2520 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002521}
2522
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002523static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002524match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002525{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002526 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002527
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002528 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002529 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002530
Fredrik Lundh6f013982000-07-03 18:44:21 +00002531 i = -1;
2532
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002533 if (self->pattern->groupindex) {
2534 index = PyObject_GetItem(self->pattern->groupindex, index);
2535 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002536 if (PyInt_Check(index))
2537 i = (int) PyInt_AS_LONG(index);
2538 Py_DECREF(index);
2539 } else
2540 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002541 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002542
2543 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002544}
2545
2546static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002547match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002548{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002549 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002550}
2551
2552static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002553match_expand(MatchObject* self, PyObject* args)
2554{
2555 PyObject* template;
2556 if (!PyArg_ParseTuple(args, "O:expand", &template))
2557 return NULL;
2558
2559 /* delegate to Python code */
2560 return call(
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002561 SRE_MODULE, "_expand",
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002562 Py_BuildValue("OOO", self->pattern, self, template)
2563 );
2564}
2565
2566static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002567match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002568{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002569 PyObject* result;
2570 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002571
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002572 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002573
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002574 switch (size) {
2575 case 0:
2576 result = match_getslice(self, Py_False, Py_None);
2577 break;
2578 case 1:
2579 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2580 break;
2581 default:
2582 /* fetch multiple items */
2583 result = PyTuple_New(size);
2584 if (!result)
2585 return NULL;
2586 for (i = 0; i < size; i++) {
2587 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002588 self, PyTuple_GET_ITEM(args, i), Py_None
2589 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002590 if (!item) {
2591 Py_DECREF(result);
2592 return NULL;
2593 }
2594 PyTuple_SET_ITEM(result, i, item);
2595 }
2596 break;
2597 }
2598 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002599}
2600
2601static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002602match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002603{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002604 PyObject* result;
2605 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002606
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002607 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002608 static char* kwlist[] = { "default", NULL };
2609 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002610 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002611
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002612 result = PyTuple_New(self->groups-1);
2613 if (!result)
2614 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002615
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002616 for (index = 1; index < self->groups; index++) {
2617 PyObject* item;
2618 item = match_getslice_by_index(self, index, def);
2619 if (!item) {
2620 Py_DECREF(result);
2621 return NULL;
2622 }
2623 PyTuple_SET_ITEM(result, index-1, item);
2624 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002625
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002626 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002627}
2628
2629static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002630match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002631{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002632 PyObject* result;
2633 PyObject* keys;
2634 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002635
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002636 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002637 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002638 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002639 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002640
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002641 result = PyDict_New();
2642 if (!result || !self->pattern->groupindex)
2643 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002644
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002645 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002646 if (!keys)
2647 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002648
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002649 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002650 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002651 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002652 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002653 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002654 if (!key)
2655 goto failed;
2656 value = match_getslice(self, key, def);
2657 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002658 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002659 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002660 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002661 status = PyDict_SetItem(result, key, value);
2662 Py_DECREF(value);
2663 if (status < 0)
2664 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002665 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002666
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002667 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002668
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002669 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002670
2671failed:
2672 Py_DECREF(keys);
2673 Py_DECREF(result);
2674 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002675}
2676
2677static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002678match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002679{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002680 int index;
2681
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002682 PyObject* index_ = Py_False; /* zero */
2683 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2684 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002685
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002686 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002687
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002688 if (index < 0 || index >= self->groups) {
2689 PyErr_SetString(
2690 PyExc_IndexError,
2691 "no such group"
2692 );
2693 return NULL;
2694 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002695
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002696 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002697 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002698}
2699
2700static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002701match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002702{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002703 int index;
2704
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002705 PyObject* index_ = Py_False; /* zero */
2706 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2707 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002708
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002709 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002710
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002711 if (index < 0 || index >= self->groups) {
2712 PyErr_SetString(
2713 PyExc_IndexError,
2714 "no such group"
2715 );
2716 return NULL;
2717 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002718
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002719 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002720 return Py_BuildValue("i", self->mark[index*2+1]);
2721}
2722
2723LOCAL(PyObject*)
2724_pair(int i1, int i2)
2725{
2726 PyObject* pair;
2727 PyObject* item;
2728
2729 pair = PyTuple_New(2);
2730 if (!pair)
2731 return NULL;
2732
2733 item = PyInt_FromLong(i1);
2734 if (!item)
2735 goto error;
2736 PyTuple_SET_ITEM(pair, 0, item);
2737
2738 item = PyInt_FromLong(i2);
2739 if (!item)
2740 goto error;
2741 PyTuple_SET_ITEM(pair, 1, item);
2742
2743 return pair;
2744
2745 error:
2746 Py_DECREF(pair);
2747 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002748}
2749
2750static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002751match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002752{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002753 int index;
2754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002755 PyObject* index_ = Py_False; /* zero */
2756 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2757 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002758
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002759 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002760
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002761 if (index < 0 || index >= self->groups) {
2762 PyErr_SetString(
2763 PyExc_IndexError,
2764 "no such group"
2765 );
2766 return NULL;
2767 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002768
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002769 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002770 return _pair(self->mark[index*2], self->mark[index*2+1]);
2771}
2772
2773static PyObject*
2774match_regs(MatchObject* self)
2775{
2776 PyObject* regs;
2777 PyObject* item;
2778 int index;
2779
2780 regs = PyTuple_New(self->groups);
2781 if (!regs)
2782 return NULL;
2783
2784 for (index = 0; index < self->groups; index++) {
2785 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2786 if (!item) {
2787 Py_DECREF(regs);
2788 return NULL;
2789 }
2790 PyTuple_SET_ITEM(regs, index, item);
2791 }
2792
2793 Py_INCREF(regs);
2794 self->regs = regs;
2795
2796 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002797}
2798
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002799static PyObject*
2800match_copy(MatchObject* self, PyObject* args)
2801{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002802#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002803 MatchObject* copy;
2804 int slots, offset;
2805
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002806 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2807 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002808
2809 slots = 2 * (self->pattern->groups+1);
2810
2811 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2812 if (!copy)
2813 return NULL;
2814
2815 /* this value a constant, but any compiler should be able to
2816 figure that out all by itself */
2817 offset = offsetof(MatchObject, string);
2818
2819 Py_XINCREF(self->pattern);
2820 Py_XINCREF(self->string);
2821 Py_XINCREF(self->regs);
2822
2823 memcpy((char*) copy + offset, (char*) self + offset,
2824 sizeof(MatchObject) + slots * sizeof(int) - offset);
2825
2826 return (PyObject*) copy;
2827#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002828 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002829 return NULL;
2830#endif
2831}
2832
2833static PyObject*
2834match_deepcopy(MatchObject* self, PyObject* args)
2835{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002836#ifdef USE_BUILTIN_COPY
2837 MatchObject* copy;
2838
2839 PyObject* memo;
2840 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2841 return NULL;
2842
2843 copy = (MatchObject*) match_copy(self, Py_None);
2844 if (!copy)
2845 return NULL;
2846
2847 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2848 !deepcopy(&copy->string, memo) ||
2849 !deepcopy(&copy->regs, memo)) {
2850 Py_DECREF(copy);
2851 return NULL;
2852 }
2853
2854#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002855 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2856 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002857#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002858}
2859
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002860static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002861 {"group", (PyCFunction) match_group, METH_VARARGS},
2862 {"start", (PyCFunction) match_start, METH_VARARGS},
2863 {"end", (PyCFunction) match_end, METH_VARARGS},
2864 {"span", (PyCFunction) match_span, METH_VARARGS},
2865 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2866 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2867 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002868 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2869 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002870 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002871};
2872
2873static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002874match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002875{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002876 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002877
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002878 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2879 if (res)
2880 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002881
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002882 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002883
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002884 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002885 if (self->lastindex >= 0)
2886 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002887 Py_INCREF(Py_None);
2888 return Py_None;
2889 }
2890
2891 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002892 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002893 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002894 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002895 );
2896 if (result)
2897 return result;
2898 PyErr_Clear();
2899 }
2900 Py_INCREF(Py_None);
2901 return Py_None;
2902 }
2903
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002904 if (!strcmp(name, "string")) {
2905 if (self->string) {
2906 Py_INCREF(self->string);
2907 return self->string;
2908 } else {
2909 Py_INCREF(Py_None);
2910 return Py_None;
2911 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002912 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002913
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002914 if (!strcmp(name, "regs")) {
2915 if (self->regs) {
2916 Py_INCREF(self->regs);
2917 return self->regs;
2918 } else
2919 return match_regs(self);
2920 }
2921
2922 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002923 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002924 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002925 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002926
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002927 if (!strcmp(name, "pos"))
2928 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002929
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002930 if (!strcmp(name, "endpos"))
2931 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002932
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002933 PyErr_SetString(PyExc_AttributeError, name);
2934 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002935}
2936
2937/* FIXME: implement setattr("string", None) as a special case (to
2938 detach the associated string, if any */
2939
2940statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002941 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002942 0, "_" SRE_MODULE ".SRE_Match",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002943 sizeof(MatchObject), sizeof(int),
2944 (destructor)match_dealloc, /*tp_dealloc*/
2945 0, /*tp_print*/
2946 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002947};
2948
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002949/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002950/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002951
2952static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002953scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002954{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002955 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002956 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002957 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002958}
2959
2960static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002961scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002962{
2963 SRE_STATE* state = &self->state;
2964 PyObject* match;
2965 int status;
2966
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002967 state_reset(state);
2968
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002969 state->ptr = state->start;
2970
2971 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002972 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002973 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002974#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002975 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002976#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002977 }
2978
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002979 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002980 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002981
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00002982 if ((status == 0 || state->ptr == state->start) &&
2983 state->ptr < state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002984 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002985 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002986 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002987
2988 return match;
2989}
2990
2991
2992static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002993scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002994{
2995 SRE_STATE* state = &self->state;
2996 PyObject* match;
2997 int status;
2998
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002999 state_reset(state);
3000
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003001 state->ptr = state->start;
3002
3003 if (state->charsize == 1) {
3004 status = sre_search(state, PatternObject_GetCode(self->pattern));
3005 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003006#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003007 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003008#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003009 }
3010
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003011 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003012 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003013
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00003014 if ((status == 0 || state->ptr == state->start) &&
3015 state->ptr < state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003016 state->start = (void*) ((char*) state->ptr + state->charsize);
3017 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003018 state->start = state->ptr;
3019
3020 return match;
3021}
3022
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003023static PyMethodDef scanner_methods[] = {
Neal Norwitzbb2769f2002-03-31 15:46:00 +00003024 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
3025 /* METH_OLDARGS is not in Python 1.5.2 */
3026 {"match", (PyCFunction) scanner_match, 0},
3027 {"search", (PyCFunction) scanner_search, 0},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003028 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003029};
3030
3031static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003032scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003033{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003034 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003035
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003036 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
3037 if (res)
3038 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003039
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003040 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003041
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003042 /* attributes */
3043 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003044 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003045 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003046 }
3047
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003048 PyErr_SetString(PyExc_AttributeError, name);
3049 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003050}
3051
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003052statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003053 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00003054 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003055 sizeof(ScannerObject), 0,
3056 (destructor)scanner_dealloc, /*tp_dealloc*/
3057 0, /*tp_print*/
3058 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003059};
3060
Guido van Rossumb700df92000-03-31 14:59:30 +00003061static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00003062 {"compile", _compile, METH_VARARGS},
3063 {"getcodesize", sre_codesize, METH_VARARGS},
3064 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003065 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003066};
3067
Mark Hammond8235ea12002-07-19 06:55:41 +00003068PyMODINIT_FUNC init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00003069{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003070 PyObject* m;
3071 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00003072 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003073
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003074 /* Patch object types */
3075 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003076 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00003077
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00003078 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003079 d = PyModule_GetDict(m);
3080
Fredrik Lundh21009b92001-09-18 18:47:09 +00003081 x = PyInt_FromLong(SRE_MAGIC);
3082 if (x) {
3083 PyDict_SetItemString(d, "MAGIC", x);
3084 Py_DECREF(x);
3085 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003086
Martin v. Löwis78e2f062003-04-19 12:56:08 +00003087 x = PyInt_FromLong(sizeof(SRE_CODE));
3088 if (x) {
3089 PyDict_SetItemString(d, "CODESIZE", x);
3090 Py_DECREF(x);
3091 }
3092
Fredrik Lundh21009b92001-09-18 18:47:09 +00003093 x = PyString_FromString(copyright);
3094 if (x) {
3095 PyDict_SetItemString(d, "copyright", x);
3096 Py_DECREF(x);
3097 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003098}
3099
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003100#endif /* !defined(SRE_RECURSIVE) */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00003101
3102/* vim:ts=4:sw=4:et
3103*/