blob: dde365b956b9c4e43f8ae4124b67068517515f7e [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
Fredrik Lundhebc37b22000-10-28 19:30:41 +00009 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000010 * 2000-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000011 * 2000-09-20 fl added expand method
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000012 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000013 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000014 * 2001-04-28 fl added __copy__ methods (work in progress)
Fredrik Lundh09705f02002-11-22 12:46:35 +000015 * 2001-05-14 fl fixes for 1.5.2 compatibility
Fredrik Lundhf71ae462001-07-02 17:04:48 +000016 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000017 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000018 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000019 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh703ce812001-10-24 22:16:30 +000020 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000021 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Fredrik Lundh09705f02002-11-22 12:46:35 +000022 * 2002-11-09 fl fixed empty sub/subn return type
Guido van Rossumb700df92000-03-31 14:59:30 +000023 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000024 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000026 * This version of the SRE library can be redistributed under CNRI's
27 * Python 1.6 license. For any other use, please contact Secret Labs
28 * AB (info@pythonware.com).
29 *
Guido van Rossumb700df92000-03-31 14:59:30 +000030 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000031 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * other compatibility work.
33 */
34
35#ifndef SRE_RECURSIVE
36
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000037static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000038 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000039
40#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000041#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000042
43#include "sre.h"
44
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000045#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000046
Fredrik Lundh436c3d582000-06-29 08:58:44 +000047/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000048#if !defined(SRE_MODULE)
49#define SRE_MODULE "sre"
50#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051
Guido van Rossumb700df92000-03-31 14:59:30 +000052/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000053#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000054
Fredrik Lundh971e78b2001-10-20 17:48:46 +000055#if PY_VERSION_HEX >= 0x01060000
56#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000057/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000058#define HAVE_UNICODE
59#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000060#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundh33accc12000-08-27 20:59:47 +000065/* prevent run-away recursion (bad patterns on long strings) */
66
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000067#if !defined(USE_STACKCHECK)
Fredrik Lundh33accc12000-08-27 20:59:47 +000068#if defined(MS_WIN64) || defined(__LP64__) || defined(_LP64)
69/* require smaller recursion limit for a number of 64-bit platforms:
70 Win64 (MS_WIN64), Linux64 (__LP64__), Monterey (64-bit AIX) (_LP64) */
71/* FIXME: maybe the limit should be 40000 / sizeof(void*) ? */
72#define USE_RECURSION_LIMIT 7500
73#else
74#define USE_RECURSION_LIMIT 10000
75#endif
Fredrik Lundh18c2aa22000-08-07 17:33:38 +000076#endif
Fredrik Lundh96ab4652000-08-03 16:29:50 +000077
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define USE_FAST_SEARCH
80
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000081/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000082#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000083
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000084/* enables copy/deepcopy handling (work in progress) */
85#undef USE_BUILTIN_COPY
86
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000087#if PY_VERSION_HEX < 0x01060000
88#define PyObject_DEL(op) PyMem_DEL((op))
89#endif
90
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091/* -------------------------------------------------------------------- */
92
Fredrik Lundh80946112000-06-29 18:03:25 +000093#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000094#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000095#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000096/* fastest possible local call under MSVC */
97#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000098#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000099#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000100#else
101#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +0000102#endif
103
104/* error codes */
105#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000106#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000107#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000108#define SRE_ERROR_MEMORY -9 /* out of memory */
109
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000110#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000111#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000112#else
113#define TRACE(v)
114#endif
115
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000116/* -------------------------------------------------------------------- */
117/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000118
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000119/* default character predicates (run sre_chars.py to regenerate tables) */
120
121#define SRE_DIGIT_MASK 1
122#define SRE_SPACE_MASK 2
123#define SRE_LINEBREAK_MASK 4
124#define SRE_ALNUM_MASK 8
125#define SRE_WORD_MASK 16
126
Fredrik Lundh21009b92001-09-18 18:47:09 +0000127/* FIXME: this assumes ASCII. create tables in init_sre() instead */
128
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000129static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1302, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1310, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
13225, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1340, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
13524, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
136
Fredrik Lundhb389df32000-06-29 12:48:37 +0000137static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000013810, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13927, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
14044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
14161, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
142108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
143122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
144106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
145120, 121, 122, 123, 124, 125, 126, 127 };
146
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000147#define SRE_IS_DIGIT(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
149#define SRE_IS_SPACE(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
151#define SRE_IS_LINEBREAK(ch)\
152 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
153#define SRE_IS_ALNUM(ch)\
154 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
155#define SRE_IS_WORD(ch)\
156 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000157
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000158static unsigned int sre_lower(unsigned int ch)
159{
160 return ((ch) < 128 ? sre_char_lower[ch] : ch);
161}
162
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163/* locale-specific character predicates */
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000164
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
166#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
167#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
168#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
169#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
170
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000171static unsigned int sre_lower_locale(unsigned int ch)
172{
173 return ((ch) < 256 ? tolower((ch)) : ch);
174}
175
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000176/* unicode-specific character predicates */
177
178#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000179
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000180#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
181#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
182#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000183#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000184#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000185
186static unsigned int sre_lower_unicode(unsigned int ch)
187{
188 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
189}
190
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000191#endif
192
Guido van Rossumb700df92000-03-31 14:59:30 +0000193LOCAL(int)
194sre_category(SRE_CODE category, unsigned int ch)
195{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000196 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000197
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000198 case SRE_CATEGORY_DIGIT:
199 return SRE_IS_DIGIT(ch);
200 case SRE_CATEGORY_NOT_DIGIT:
201 return !SRE_IS_DIGIT(ch);
202 case SRE_CATEGORY_SPACE:
203 return SRE_IS_SPACE(ch);
204 case SRE_CATEGORY_NOT_SPACE:
205 return !SRE_IS_SPACE(ch);
206 case SRE_CATEGORY_WORD:
207 return SRE_IS_WORD(ch);
208 case SRE_CATEGORY_NOT_WORD:
209 return !SRE_IS_WORD(ch);
210 case SRE_CATEGORY_LINEBREAK:
211 return SRE_IS_LINEBREAK(ch);
212 case SRE_CATEGORY_NOT_LINEBREAK:
213 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000214
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000215 case SRE_CATEGORY_LOC_WORD:
216 return SRE_LOC_IS_WORD(ch);
217 case SRE_CATEGORY_LOC_NOT_WORD:
218 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000219
220#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000221 case SRE_CATEGORY_UNI_DIGIT:
222 return SRE_UNI_IS_DIGIT(ch);
223 case SRE_CATEGORY_UNI_NOT_DIGIT:
224 return !SRE_UNI_IS_DIGIT(ch);
225 case SRE_CATEGORY_UNI_SPACE:
226 return SRE_UNI_IS_SPACE(ch);
227 case SRE_CATEGORY_UNI_NOT_SPACE:
228 return !SRE_UNI_IS_SPACE(ch);
229 case SRE_CATEGORY_UNI_WORD:
230 return SRE_UNI_IS_WORD(ch);
231 case SRE_CATEGORY_UNI_NOT_WORD:
232 return !SRE_UNI_IS_WORD(ch);
233 case SRE_CATEGORY_UNI_LINEBREAK:
234 return SRE_UNI_IS_LINEBREAK(ch);
235 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
236 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000237#else
238 case SRE_CATEGORY_UNI_DIGIT:
239 return SRE_IS_DIGIT(ch);
240 case SRE_CATEGORY_UNI_NOT_DIGIT:
241 return !SRE_IS_DIGIT(ch);
242 case SRE_CATEGORY_UNI_SPACE:
243 return SRE_IS_SPACE(ch);
244 case SRE_CATEGORY_UNI_NOT_SPACE:
245 return !SRE_IS_SPACE(ch);
246 case SRE_CATEGORY_UNI_WORD:
247 return SRE_LOC_IS_WORD(ch);
248 case SRE_CATEGORY_UNI_NOT_WORD:
249 return !SRE_LOC_IS_WORD(ch);
250 case SRE_CATEGORY_UNI_LINEBREAK:
251 return SRE_IS_LINEBREAK(ch);
252 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
253 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000254#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000255 }
256 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000257}
258
259/* helpers */
260
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000261static void
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000262mark_fini(SRE_STATE* state)
263{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000264 if (state->mark_stack) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000265 free(state->mark_stack);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000266 state->mark_stack = NULL;
267 }
268 state->mark_stack_size = state->mark_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000269}
270
271static int
272mark_save(SRE_STATE* state, int lo, int hi)
273{
274 void* stack;
275 int size;
276 int minsize, newsize;
277
278 if (hi <= lo)
279 return 0;
280
281 size = (hi - lo) + 1;
282
283 newsize = state->mark_stack_size;
284 minsize = state->mark_stack_base + size;
285
286 if (newsize < minsize) {
287 /* create new stack */
288 if (!newsize) {
289 newsize = 512;
290 if (newsize < minsize)
291 newsize = minsize;
292 TRACE(("allocate stack %d\n", newsize));
293 stack = malloc(sizeof(void*) * newsize);
294 } else {
295 /* grow the stack */
296 while (newsize < minsize)
297 newsize += newsize;
298 TRACE(("grow stack to %d\n", newsize));
299 stack = realloc(state->mark_stack, sizeof(void*) * newsize);
300 }
301 if (!stack) {
302 mark_fini(state);
303 return SRE_ERROR_MEMORY;
304 }
305 state->mark_stack = stack;
306 state->mark_stack_size = newsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000307 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000308
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000309 TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000310
311 memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
312 size * sizeof(void*));
313
314 state->mark_stack_base += size;
315
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000316 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000317}
318
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000319static int
320mark_restore(SRE_STATE* state, int lo, int hi)
Guido van Rossumb700df92000-03-31 14:59:30 +0000321{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000322 int size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000323
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000324 if (hi <= lo)
325 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000326
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000327 size = (hi - lo) + 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000328
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000329 state->mark_stack_base -= size;
Guido van Rossumb700df92000-03-31 14:59:30 +0000330
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000331 TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
Guido van Rossumb700df92000-03-31 14:59:30 +0000332
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000333 memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
334 size * sizeof(void*));
Guido van Rossumb700df92000-03-31 14:59:30 +0000335
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000336 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000337}
338
Neal Norwitzaddfe0c2002-11-10 14:33:26 +0000339static void
340lastmark_restore(SRE_STATE *state, int lastmark)
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000341{
342 if (state->lastmark > lastmark) {
343 memset(
344 state->mark + lastmark + 1, 0,
345 (state->lastmark - lastmark) * sizeof(void*)
346 );
347 state->lastmark = lastmark;
348 state->lastindex = (lastmark == 0) ? -1 : (lastmark-1)/2+1;
349 }
350}
351
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000352/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000353
354#define SRE_CHAR unsigned char
355#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000356#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000357#define SRE_CHARSET sre_charset
358#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000359#define SRE_MATCH sre_match
360#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000361#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000362
363#if defined(HAVE_UNICODE)
364
Guido van Rossumb700df92000-03-31 14:59:30 +0000365#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000366#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000367#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000368
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000369#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000370#undef SRE_SEARCH
371#undef SRE_MATCH
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000372#undef SRE_INFO
373#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000374#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000375#undef SRE_AT
376#undef SRE_CHAR
377
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000378/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000379
380#define SRE_CHAR Py_UNICODE
381#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000382#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000383#define SRE_CHARSET sre_ucharset
384#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000385#define SRE_MATCH sre_umatch
386#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000387#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000388#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000389
390#endif /* SRE_RECURSIVE */
391
392/* -------------------------------------------------------------------- */
393/* String matching engine */
394
395/* the following section is compiled twice, with different character
396 settings */
397
398LOCAL(int)
399SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
400{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000402
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000403 int this, that;
Guido van Rossumb700df92000-03-31 14:59:30 +0000404
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000405 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000408 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000410
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000411 case SRE_AT_BEGINNING_LINE:
412 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000413 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000414
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000415 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000416 return (((void*) (ptr+1) == state->end &&
417 SRE_IS_LINEBREAK((int) ptr[0])) ||
418 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000420 case SRE_AT_END_LINE:
421 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000422 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000423
Fredrik Lundh770617b2001-01-14 15:06:11 +0000424 case SRE_AT_END_STRING:
425 return ((void*) ptr == state->end);
426
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000427 case SRE_AT_BOUNDARY:
428 if (state->beginning == state->end)
429 return 0;
430 that = ((void*) ptr > state->beginning) ?
431 SRE_IS_WORD((int) ptr[-1]) : 0;
432 this = ((void*) ptr < state->end) ?
433 SRE_IS_WORD((int) ptr[0]) : 0;
434 return this != that;
Fredrik Lundh80946112000-06-29 18:03:25 +0000435
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000436 case SRE_AT_NON_BOUNDARY:
437 if (state->beginning == state->end)
438 return 0;
439 that = ((void*) ptr > state->beginning) ?
440 SRE_IS_WORD((int) ptr[-1]) : 0;
441 this = ((void*) ptr < state->end) ?
442 SRE_IS_WORD((int) ptr[0]) : 0;
443 return this == that;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000444
445 case SRE_AT_LOC_BOUNDARY:
446 if (state->beginning == state->end)
447 return 0;
448 that = ((void*) ptr > state->beginning) ?
449 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
450 this = ((void*) ptr < state->end) ?
451 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
452 return this != that;
453
454 case SRE_AT_LOC_NON_BOUNDARY:
455 if (state->beginning == state->end)
456 return 0;
457 that = ((void*) ptr > state->beginning) ?
458 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
459 this = ((void*) ptr < state->end) ?
460 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
461 return this == that;
462
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000463#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000464 case SRE_AT_UNI_BOUNDARY:
465 if (state->beginning == state->end)
466 return 0;
467 that = ((void*) ptr > state->beginning) ?
468 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
469 this = ((void*) ptr < state->end) ?
470 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
471 return this != that;
472
473 case SRE_AT_UNI_NON_BOUNDARY:
474 if (state->beginning == state->end)
475 return 0;
476 that = ((void*) ptr > state->beginning) ?
477 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
478 this = ((void*) ptr < state->end) ?
479 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
480 return this == that;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000481#endif
482
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000483 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000484
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000485 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000486}
487
488LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000489SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000490{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000491 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000492
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000493 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000494
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000495 for (;;) {
496 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000497
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000498 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000499 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000500 if (ch == set[0])
501 return ok;
502 set++;
503 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000504
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000505 case SRE_OP_RANGE:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000506 /* <RANGE> <lower> <upper> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000507 if (set[0] <= ch && ch <= set[1])
508 return ok;
509 set += 2;
510 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000511
Fredrik Lundh3562f112000-07-02 12:00:07 +0000512 case SRE_OP_CHARSET:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000513 /* <CHARSET> <bitmap> (16 bits per code word) */
Fredrik Lundh3562f112000-07-02 12:00:07 +0000514 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
515 return ok;
516 set += 16;
517 break;
518
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000519 case SRE_OP_BIGCHARSET:
520 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
521 {
522 int count, block;
523 count = *(set++);
524 block = ((unsigned char*)set)[ch >> 8];
525 set += 128;
526 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
527 return ok;
528 set += count*16;
529 break;
530 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000531
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000532 case SRE_OP_CATEGORY:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000533 /* <CATEGORY> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000534 if (sre_category(set[0], (int) ch))
535 return ok;
536 set += 1;
537 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000538
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000539 case SRE_OP_NEGATE:
540 ok = !ok;
541 break;
542
543 case SRE_OP_FAILURE:
544 return !ok;
545
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000546 default:
547 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000548 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000549 return 0;
550 }
551 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000552}
553
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000554LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
555
556LOCAL(int)
557SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
558{
559 SRE_CODE chr;
560 SRE_CHAR* ptr = state->ptr;
561 SRE_CHAR* end = state->end;
562 int i;
563
564 /* adjust end */
565 if (maxcount < end - ptr && maxcount != 65535)
566 end = ptr + maxcount;
567
568 switch (pattern[0]) {
569
570 case SRE_OP_ANY:
571 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000572 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000573 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
574 ptr++;
575 break;
576
577 case SRE_OP_ANY_ALL:
578 /* repeated dot wildcare. skip to the end of the target
579 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000581 ptr = end;
582 break;
583
584 case SRE_OP_LITERAL:
585 /* repeated literal */
586 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000587 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000588 while (ptr < end && (SRE_CODE) *ptr == chr)
589 ptr++;
590 break;
591
592 case SRE_OP_LITERAL_IGNORE:
593 /* repeated literal */
594 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000595 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000596 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
597 ptr++;
598 break;
599
600 case SRE_OP_NOT_LITERAL:
601 /* repeated non-literal */
602 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000603 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000604 while (ptr < end && (SRE_CODE) *ptr != chr)
605 ptr++;
606 break;
607
608 case SRE_OP_NOT_LITERAL_IGNORE:
609 /* repeated non-literal */
610 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000612 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
613 ptr++;
614 break;
615
616 case SRE_OP_IN:
617 /* repeated set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000618 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
619 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000620 ptr++;
621 break;
622
623 default:
624 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000625 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000626 while ((SRE_CHAR*) state->ptr < end) {
627 i = SRE_MATCH(state, pattern, level);
628 if (i < 0)
629 return i;
630 if (!i)
631 break;
632 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000633 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
634 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000635 return (SRE_CHAR*) state->ptr - ptr;
636 }
637
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000638 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000639 return ptr - (SRE_CHAR*) state->ptr;
640}
641
Fredrik Lundh33accc12000-08-27 20:59:47 +0000642#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000643LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000644SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
645{
646 /* check if an SRE_OP_INFO block matches at the current position.
647 returns the number of SRE_CODE objects to skip if successful, 0
648 if no match */
649
650 SRE_CHAR* end = state->end;
651 SRE_CHAR* ptr = state->ptr;
652 int i;
653
654 /* check minimal length */
655 if (pattern[3] && (end - ptr) < pattern[3])
656 return 0;
657
658 /* check known prefix */
659 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
660 /* <length> <skip> <prefix data> <overlap data> */
661 for (i = 0; i < pattern[5]; i++)
662 if ((SRE_CODE) ptr[i] != pattern[7 + i])
663 return 0;
664 return pattern[0] + 2 * pattern[6];
665 }
666 return pattern[0];
667}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000668#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000669
670LOCAL(int)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000671SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
Guido van Rossumb700df92000-03-31 14:59:30 +0000672{
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000673 /* check if string matches the given pattern. returns <0 for
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674 error, 0 for failure, and 1 for success */
Guido van Rossumb700df92000-03-31 14:59:30 +0000675
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000676 SRE_CHAR* end = state->end;
677 SRE_CHAR* ptr = state->ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 int i, count;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000679 SRE_REPEAT* rp;
680 int lastmark;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000681 SRE_CODE chr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000682
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000683 SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000684
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000686
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000687#if defined(USE_STACKCHECK)
Fredrik Lundh58100642000-08-09 09:14:35 +0000688 if (level % 10 == 0 && PyOS_CheckStack())
Fredrik Lundh18c2aa22000-08-07 17:33:38 +0000689 return SRE_ERROR_RECURSION_LIMIT;
690#endif
691
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000692#if defined(USE_RECURSION_LIMIT)
693 if (level > USE_RECURSION_LIMIT)
694 return SRE_ERROR_RECURSION_LIMIT;
695#endif
696
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000697 if (pattern[0] == SRE_OP_INFO) {
698 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000699 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000700 if (pattern[3] && (end - ptr) < pattern[3]) {
701 TRACE(("reject (got %d chars, need %d)\n",
702 (end - ptr), pattern[3]));
703 return 0;
704 }
705 pattern += pattern[1] + 1;
706 }
707
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000708 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000709
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000710 switch (*pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000711
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000712 case SRE_OP_FAILURE:
713 /* immediate failure */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000714 TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000715 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000716
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000717 case SRE_OP_SUCCESS:
718 /* end of pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000719 TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000720 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000721 return 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000722
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000723 case SRE_OP_AT:
724 /* match at given position */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000725 /* <AT> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000726 TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000727 if (!SRE_AT(state, ptr, *pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000728 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000729 pattern++;
730 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000731
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000732 case SRE_OP_CATEGORY:
733 /* match at given category */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000734 /* <CATEGORY> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000735 TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000736 if (ptr >= end || !sre_category(pattern[0], ptr[0]))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000737 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000738 pattern++;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000739 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000740 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000741
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000742 case SRE_OP_LITERAL:
743 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000744 /* <LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000745 TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000747 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000748 pattern++;
749 ptr++;
750 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 case SRE_OP_NOT_LITERAL:
753 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000754 /* <NOT_LITERAL> <code> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000755 TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000756 if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000757 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000758 pattern++;
759 ptr++;
760 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000761
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000763 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000764 /* <ANY> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000765 TRACE(("|%p|%p|ANY\n", pattern, ptr));
Fredrik Lundhe1869832000-08-01 22:47:49 +0000766 if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
767 return 0;
768 ptr++;
769 break;
770
771 case SRE_OP_ANY_ALL:
772 /* match anything */
773 /* <ANY_ALL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000774 TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000775 if (ptr >= end)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000776 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000777 ptr++;
778 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000779
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 case SRE_OP_IN:
781 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000782 /* <IN> <skip> <set> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000783 TRACE(("|%p|%p|IN\n", pattern, ptr));
784 if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000785 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000786 pattern += pattern[0];
787 ptr++;
788 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000789
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000790 case SRE_OP_GROUPREF:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000791 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000792 TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000793 i = pattern[0];
794 {
795 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
796 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
797 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000798 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000799 while (p < e) {
800 if (ptr >= end || *ptr != *p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000801 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000802 p++; ptr++;
803 }
804 }
805 pattern++;
806 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000807
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000808 case SRE_OP_GROUPREF_IGNORE:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000809 /* match backreference */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000810 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000811 i = pattern[0];
812 {
813 SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
814 SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
815 if (!p || !e || e < p)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000816 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817 while (p < e) {
818 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000819 state->lower(*ptr) != state->lower(*p))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000820 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000821 p++; ptr++;
822 }
823 }
824 pattern++;
825 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000826
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000827 case SRE_OP_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000828 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000829 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000830 state->lower(*ptr) != state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000831 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000832 pattern++;
833 ptr++;
834 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000835
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000836 case SRE_OP_NOT_LITERAL_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000837 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000838 if (ptr >= end ||
Fredrik Lundhb389df32000-06-29 12:48:37 +0000839 state->lower(*ptr) == state->lower(*pattern))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000840 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000841 pattern++;
842 ptr++;
843 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000845 case SRE_OP_IN_IGNORE:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000846 TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000847 if (ptr >= end
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000848 || !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000849 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000850 pattern += pattern[0];
851 ptr++;
852 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000853
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000854 case SRE_OP_MARK:
855 /* set mark */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000856 /* <MARK> <gid> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000857 TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000858 i = pattern[0];
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000859 if (i > state->lastmark) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000860 state->lastmark = i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000861 if (i & 1)
862 state->lastindex = i/2 + 1;
863 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000864 state->mark[i] = ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000865 pattern++;
866 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000867
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000868 case SRE_OP_JUMP:
869 case SRE_OP_INFO:
870 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000871 /* <JUMP> <offset> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000872 TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000873 pattern += pattern[0];
874 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000875
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000876 case SRE_OP_ASSERT:
877 /* assert subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000878 /* <ASSERT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000879 TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000880 state->ptr = ptr - pattern[1];
Fredrik Lundh6f013982000-07-03 18:44:21 +0000881 if (state->ptr < state->beginning)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000882 return 0;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000883 i = SRE_MATCH(state, pattern + 2, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000884 if (i <= 0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000885 return i;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000886 pattern += pattern[0];
887 break;
Fredrik Lundh43b3b492000-06-30 10:41:31 +0000888
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000889 case SRE_OP_ASSERT_NOT:
890 /* assert not subpattern */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000891 /* <ASSERT_NOT> <skip> <back> <pattern> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000892 TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000893 state->ptr = ptr - pattern[1];
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000894 if (state->ptr >= state->beginning) {
895 i = SRE_MATCH(state, pattern + 2, level + 1);
896 if (i < 0)
897 return i;
898 if (i)
899 return 0;
900 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000901 pattern += pattern[0];
902 break;
903
904 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000905 /* alternation */
906 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000907 TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000908 lastmark = state->lastmark;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000909 for (; pattern[0]; pattern += pattern[0]) {
910 if (pattern[1] == SRE_OP_LITERAL &&
911 (ptr >= end || (SRE_CODE) *ptr != pattern[2]))
912 continue;
913 if (pattern[1] == SRE_OP_IN &&
914 (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
915 continue;
916 state->ptr = ptr;
917 i = SRE_MATCH(state, pattern + 1, level + 1);
918 if (i)
919 return i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000920 lastmark_restore(state, lastmark);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000921 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000922 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000923
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000924 case SRE_OP_REPEAT_ONE:
925 /* match repeated sequence (maximizing regexp) */
926
927 /* this operator only works if the repeated item is
928 exactly one character wide, and we're not already
929 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000930 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000931
932 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
933
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000934 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000935 pattern[1], pattern[2]));
936
Fredrik Lundhe1869832000-08-01 22:47:49 +0000937 if (ptr + pattern[1] > end)
938 return 0; /* cannot match */
939
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000940 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000941
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000942 count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
943 if (count < 0)
944 return count;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000945
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000946 ptr += count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000947
948 /* when we arrive here, count contains the number of
949 matches, and ptr points to the tail of the target
950 string. check if the rest of the pattern matches,
951 and backtrack if not. */
952
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000953 if (count < (int) pattern[1])
954 return 0;
955
956 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
957 /* tail is empty. we're finished */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000958 state->ptr = ptr;
959 return 1;
960
961 } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
962 /* tail starts with a literal. skip positions where
963 the rest of the pattern cannot possibly match */
Fredrik Lundhe1869832000-08-01 22:47:49 +0000964 chr = pattern[pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000965 for (;;) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000966 while (count >= (int) pattern[1] &&
967 (ptr >= end || *ptr != chr)) {
968 ptr--;
969 count--;
970 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000971 if (count < (int) pattern[1])
972 break;
973 state->ptr = ptr;
974 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000975 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000976 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000977 ptr--;
978 count--;
979 }
980
981 } else {
982 /* general case */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000983 lastmark = state->lastmark;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000984 while (count >= (int) pattern[1]) {
985 state->ptr = ptr;
986 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000987 if (i)
Fredrik Lundh33accc12000-08-27 20:59:47 +0000988 return i;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000989 ptr--;
990 count--;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +0000991 lastmark_restore(state, lastmark);
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000992 }
993 }
994 return 0;
995
Guido van Rossum41c99e72003-04-14 17:59:34 +0000996 case SRE_OP_MIN_REPEAT_ONE:
997 /* match repeated sequence (minimizing regexp) */
998
999 /* this operator only works if the repeated item is
1000 exactly one character wide, and we're not already
1001 collecting backtracking points. for other cases,
1002 use the MIN_REPEAT operator */
1003
1004 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1005
1006 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
1007 pattern[1], pattern[2]));
1008
1009 if (ptr + pattern[1] > end)
1010 return 0; /* cannot match */
1011
1012 state->ptr = ptr;
1013
1014 if (pattern[1] == 0)
1015 count = 0;
1016 else {
1017 /* count using pattern min as the maximum */
1018 count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1);
1019
1020 if (count < 0)
1021 return count; /* exception */
1022 if (count < (int) pattern[1])
1023 return 0; /* did not match minimum number of times */
1024 ptr += count; /* advance past minimum matches of repeat */
1025 }
1026
1027 if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
1028 /* tail is empty. we're finished */
1029 state->ptr = ptr;
1030 return 1;
1031
1032 } else {
1033 /* general case */
1034 int matchmax = ((int)pattern[2] == 65535);
1035 int c;
1036 lastmark = state->lastmark;
1037 while (matchmax || count <= (int) pattern[2]) {
1038 state->ptr = ptr;
1039 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
1040 if (i)
1041 return i;
1042 state->ptr = ptr;
1043 c = SRE_COUNT(state, pattern+3, 1, level+1);
1044 if (c < 0)
1045 return c;
1046 if (c == 0)
1047 break;
1048 assert(c == 1);
1049 ptr++;
1050 count++;
1051 }
1052 lastmark_restore(state, lastmark);
1053 }
1054 return 0;
1055
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001056 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001057 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001058 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001059 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001060 TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001061 pattern[1], pattern[2]));
1062
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001063 rep.count = -1;
1064 rep.pattern = pattern;
1065
1066 /* install new repeat context */
1067 rep.prev = state->repeat;
1068 state->repeat = &rep;
1069
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001070 state->ptr = ptr;
1071 i = SRE_MATCH(state, pattern + pattern[0], level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001072
1073 state->repeat = rep.prev;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001074
1075 return i;
1076
1077 case SRE_OP_MAX_UNTIL:
1078 /* maximizing repeat */
1079 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1080
1081 /* FIXME: we probably need to deal with zero-width
1082 matches in here... */
1083
1084 rp = state->repeat;
1085 if (!rp)
1086 return SRE_ERROR_STATE;
1087
1088 state->ptr = ptr;
1089
1090 count = rp->count + 1;
1091
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001092 TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001093
1094 if (count < rp->pattern[1]) {
1095 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001096 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001097 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001098 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001099 if (i)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001100 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001101 rp->count = count - 1;
1102 state->ptr = ptr;
1103 return 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001104 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001105
1106 if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001107 /* we may have enough matches, but if we can
1108 match another item, do so */
1109 rp->count = count;
1110 lastmark = state->lastmark;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001111 i = mark_save(state, 0, lastmark);
1112 if (i < 0)
1113 return i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001114 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001115 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001116 if (i)
1117 return i;
Fredrik Lundh33accc12000-08-27 20:59:47 +00001118 i = mark_restore(state, 0, lastmark);
1119 if (i < 0)
1120 return i;
Gustavo Niemeyer4e7be062002-11-06 14:06:53 +00001121 lastmark_restore(state, lastmark);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001122 rp->count = count - 1;
1123 state->ptr = ptr;
1124 }
1125
1126 /* cannot match more repeated items here. make sure the
1127 tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001128 state->repeat = rp->prev;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001129 i = SRE_MATCH(state, pattern, level + 1);
1130 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001131 return i;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001132 state->repeat = rp;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001133 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001134 return 0;
1135
1136 case SRE_OP_MIN_UNTIL:
1137 /* minimizing repeat */
1138 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1139
1140 rp = state->repeat;
1141 if (!rp)
1142 return SRE_ERROR_STATE;
1143
1144 count = rp->count + 1;
1145
Fredrik Lundh770617b2001-01-14 15:06:11 +00001146 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
1147 rp->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001148
1149 state->ptr = ptr;
1150
1151 if (count < rp->pattern[1]) {
1152 /* not enough matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001153 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001154 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001155 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001156 if (i)
1157 return i;
1158 rp->count = count-1;
1159 state->ptr = ptr;
1160 return 0;
1161 }
1162
1163 /* see if the tail matches */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001164 state->repeat = rp->prev;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001165 i = SRE_MATCH(state, pattern, level + 1);
1166 if (i)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001167 return i;
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001168
Fredrik Lundh770617b2001-01-14 15:06:11 +00001169 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001170 state->repeat = rp;
1171
1172 if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
1173 return 0;
1174
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001175 rp->count = count;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001176 /* RECURSIVE */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001177 i = SRE_MATCH(state, rp->pattern + 3, level + 1);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001178 if (i)
1179 return i;
1180 rp->count = count - 1;
Fredrik Lundh770617b2001-01-14 15:06:11 +00001181 state->ptr = ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001182 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001183
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001184 default:
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001185 TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001186 return SRE_ERROR_ILLEGAL;
1187 }
1188 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001189
Sjoerd Mullender89dfe9e2001-08-30 14:37:07 +00001190 /* can't end up here */
Fredrik Lundh21009b92001-09-18 18:47:09 +00001191 /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
Guido van Rossumb700df92000-03-31 14:59:30 +00001192}
1193
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001194LOCAL(int)
Guido van Rossumb700df92000-03-31 14:59:30 +00001195SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1196{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001197 SRE_CHAR* ptr = state->start;
1198 SRE_CHAR* end = state->end;
1199 int status = 0;
Fredrik Lundh28552902000-07-05 21:14:16 +00001200 int prefix_len = 0;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001201 int prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001202 SRE_CODE* prefix = NULL;
1203 SRE_CODE* charset = NULL;
1204 SRE_CODE* overlap = NULL;
1205 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001206
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001207 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001208 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001209 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001210
1211 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001212
1213 if (pattern[3] > 0) {
1214 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001215 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001216 end -= pattern[3]-1;
1217 if (end <= ptr)
1218 end = ptr+1;
1219 }
1220
Fredrik Lundh3562f112000-07-02 12:00:07 +00001221 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001222 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001223 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001224 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001225 prefix_skip = pattern[6];
1226 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001227 overlap = prefix + prefix_len - 1;
1228 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001229 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001230 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001231 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001232
1233 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001234 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001235
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001236 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1237 TRACE(("charset = %p\n", charset));
1238
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001239#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001240 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001241 /* pattern starts with a known prefix. use the overlap
1242 table to skip forward as fast as we possibly can */
1243 int i = 0;
1244 end = state->end;
1245 while (ptr < end) {
1246 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001247 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001248 if (!i)
1249 break;
1250 else
1251 i = overlap[i];
1252 } else {
1253 if (++i == prefix_len) {
1254 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001255 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1256 state->start = ptr + 1 - prefix_len;
1257 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001258 if (flags & SRE_INFO_LITERAL)
1259 return 1; /* we got all of it */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001260 status = SRE_MATCH(state, pattern + 2*prefix_skip, 1);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001261 if (status != 0)
1262 return status;
1263 /* close but no cigar -- try again */
1264 i = overlap[i];
1265 }
1266 break;
1267 }
1268
1269 }
1270 ptr++;
1271 }
1272 return 0;
1273 }
1274#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001275
Fredrik Lundh3562f112000-07-02 12:00:07 +00001276 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001277 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001278 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001279 SRE_CODE chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001280 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001281 for (;;) {
1282 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1283 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001284 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001285 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001286 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001287 state->start = ptr;
1288 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001289 if (flags & SRE_INFO_LITERAL)
1290 return 1; /* we got all of it */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001291 status = SRE_MATCH(state, pattern + 2, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001292 if (status != 0)
1293 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001294 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001295 } else if (charset) {
1296 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001297 end = state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001298 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001299 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001300 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001301 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001302 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001303 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001304 state->start = ptr;
1305 state->ptr = ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001306 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001307 if (status != 0)
1308 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001309 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001310 }
1311 } else
1312 /* general case */
1313 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001314 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001315 state->start = state->ptr = ptr++;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001316 status = SRE_MATCH(state, pattern, 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001317 if (status != 0)
1318 break;
1319 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001320
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001321 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001322}
Fredrik Lundh3562f112000-07-02 12:00:07 +00001323
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001324LOCAL(int)
1325SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
1326{
1327 /* check if given string is a literal template (i.e. no escapes) */
1328 while (len-- > 0)
1329 if (*ptr++ == '\\')
1330 return 0;
1331 return 1;
1332}
Guido van Rossumb700df92000-03-31 14:59:30 +00001333
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001334#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001335
1336/* -------------------------------------------------------------------- */
1337/* factories and destructors */
1338
1339/* see sre.h for object declarations */
1340
Jeremy Hylton938ace62002-07-17 16:30:39 +00001341static PyTypeObject Pattern_Type;
1342static PyTypeObject Match_Type;
1343static PyTypeObject Scanner_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00001344
1345static PyObject *
1346_compile(PyObject* self_, PyObject* args)
1347{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001348 /* "compile" pattern descriptor to pattern object */
Guido van Rossumb700df92000-03-31 14:59:30 +00001349
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001350 PatternObject* self;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001351 int i, n;
Guido van Rossumb700df92000-03-31 14:59:30 +00001352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001353 PyObject* pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001354 int flags = 0;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001355 PyObject* code;
1356 int groups = 0;
1357 PyObject* groupindex = NULL;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001358 PyObject* indexgroup = NULL;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001359 if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
1360 &PyList_Type, &code, &groups,
1361 &groupindex, &indexgroup))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001362 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001363
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001364 n = PyList_GET_SIZE(code);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001365
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001366 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001367 if (!self)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001368 return NULL;
Fredrik Lundh6f013982000-07-03 18:44:21 +00001369
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001370 self->codesize = n;
1371
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001372 for (i = 0; i < n; i++) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001373 PyObject *o = PyList_GET_ITEM(code, i);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001374 self->code[i] = (SRE_CODE) PyInt_AsLong(o);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001375 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001376
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001377 if (PyErr_Occurred()) {
1378 PyObject_DEL(self);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001379 return NULL;
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001380 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001381
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001382 Py_INCREF(pattern);
1383 self->pattern = pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00001384
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001385 self->flags = flags;
1386
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001387 self->groups = groups;
Guido van Rossumb700df92000-03-31 14:59:30 +00001388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001389 Py_XINCREF(groupindex);
1390 self->groupindex = groupindex;
Guido van Rossumb700df92000-03-31 14:59:30 +00001391
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001392 Py_XINCREF(indexgroup);
1393 self->indexgroup = indexgroup;
Fredrik Lundhc2301732000-07-02 22:25:39 +00001394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001395 return (PyObject*) self;
Guido van Rossumb700df92000-03-31 14:59:30 +00001396}
1397
1398static PyObject *
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001399sre_codesize(PyObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001400{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001401 return Py_BuildValue("i", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001402}
1403
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001404static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001405sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001406{
1407 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001408 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001409 return NULL;
1410 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001411 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001412 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001413#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001414 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001415#else
1416 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001417#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001418 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001419}
1420
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001421LOCAL(void)
1422state_reset(SRE_STATE* state)
1423{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001424 state->lastmark = 0;
1425
1426 /* FIXME: dynamic! */
Neal Norwitz35fc7602002-06-13 21:11:11 +00001427 memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001428
1429 state->lastindex = -1;
1430
1431 state->repeat = NULL;
1432
1433 mark_fini(state);
1434}
1435
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001436static void*
1437getstring(PyObject* string, int* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001438{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001439 /* given a python object, return a data pointer, a length (in
1440 characters), and a character size. return NULL if the object
1441 is not a string (or not compatible) */
1442
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001443 PyBufferProcs *buffer;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001444 int size, bytes, charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001445 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001446
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001447#if defined(HAVE_UNICODE)
1448 if (PyUnicode_Check(string)) {
1449 /* unicode strings doesn't always support the buffer interface */
1450 ptr = (void*) PyUnicode_AS_DATA(string);
1451 bytes = PyUnicode_GET_DATA_SIZE(string);
1452 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001453 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001454
1455 } else {
1456#endif
1457
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001458 /* get pointer to string buffer */
1459 buffer = string->ob_type->tp_as_buffer;
1460 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1461 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001462 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001463 return NULL;
1464 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001465
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001466 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001467 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1468 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001469 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1470 return NULL;
1471 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001473 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001474#if PY_VERSION_HEX >= 0x01060000
1475 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001476#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001477 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001478#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001479
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001480 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001481 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001482#if defined(HAVE_UNICODE)
1483 else if (bytes == (int) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001484 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001485#endif
1486 else {
1487 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1488 return NULL;
1489 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001490
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001491#if defined(HAVE_UNICODE)
1492 }
1493#endif
1494
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001495 *p_length = size;
1496 *p_charsize = charsize;
1497
1498 return ptr;
1499}
1500
1501LOCAL(PyObject*)
1502state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1503 int start, int end)
1504{
1505 /* prepare state object */
1506
1507 int length;
1508 int charsize;
1509 void* ptr;
1510
1511 memset(state, 0, sizeof(SRE_STATE));
1512
1513 state->lastindex = -1;
1514
1515 ptr = getstring(string, &length, &charsize);
1516 if (!ptr)
1517 return NULL;
1518
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001519 /* adjust boundaries */
1520 if (start < 0)
1521 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001522 else if (start > length)
1523 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001524
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001525 if (end < 0)
1526 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001527 else if (end > length)
1528 end = length;
1529
1530 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001531
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001532 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001533
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001534 state->start = (void*) ((char*) ptr + start * state->charsize);
1535 state->end = (void*) ((char*) ptr + end * state->charsize);
1536
1537 Py_INCREF(string);
1538 state->string = string;
1539 state->pos = start;
1540 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001541
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001542 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001543 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001544 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001545#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001546 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001547#else
1548 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001549#endif
1550 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001551 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001552
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001553 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001554}
1555
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001556LOCAL(void)
1557state_fini(SRE_STATE* state)
1558{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001559 Py_XDECREF(state->string);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001560 mark_fini(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001561}
1562
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001563/* calculate offset from start of string */
1564#define STATE_OFFSET(state, member)\
1565 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1566
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001567LOCAL(PyObject*)
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001568state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001569{
Fredrik Lundh58100642000-08-09 09:14:35 +00001570 int i, j;
1571
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001572 index = (index - 1) * 2;
1573
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001574 if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001575 if (empty)
1576 /* want empty string */
1577 i = j = 0;
1578 else {
1579 Py_INCREF(Py_None);
1580 return Py_None;
1581 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001582 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001583 i = STATE_OFFSET(state, state->mark[index]);
1584 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001585 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001586
Fredrik Lundh58100642000-08-09 09:14:35 +00001587 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001588}
1589
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001590static void
1591pattern_error(int status)
1592{
1593 switch (status) {
1594 case SRE_ERROR_RECURSION_LIMIT:
1595 PyErr_SetString(
1596 PyExc_RuntimeError,
1597 "maximum recursion limit exceeded"
1598 );
1599 break;
1600 case SRE_ERROR_MEMORY:
1601 PyErr_NoMemory();
1602 break;
1603 default:
1604 /* other error codes indicate compiler/engine bugs */
1605 PyErr_SetString(
1606 PyExc_RuntimeError,
1607 "internal error in regular expression engine"
1608 );
1609 }
1610}
1611
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001612static PyObject*
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001613pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001614{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001615 /* create match object (from state object) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001616
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001617 MatchObject* match;
1618 int i, j;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001619 char* base;
1620 int n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001621
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001622 if (status > 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001623
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001624 /* create match object (with room for extra group marks) */
1625 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
Fredrik Lundh6f013982000-07-03 18:44:21 +00001626 2*(pattern->groups+1));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001627 if (!match)
1628 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001629
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001630 Py_INCREF(pattern);
1631 match->pattern = pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001632
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001633 Py_INCREF(state->string);
1634 match->string = state->string;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001635
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001636 match->regs = NULL;
1637 match->groups = pattern->groups+1;
1638
1639 /* fill in group slices */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001640
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001641 base = (char*) state->beginning;
1642 n = state->charsize;
1643
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001644 match->mark[0] = ((char*) state->start - base) / n;
1645 match->mark[1] = ((char*) state->ptr - base) / n;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001646
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001647 for (i = j = 0; i < pattern->groups; i++, j+=2)
1648 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
1649 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
1650 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
1651 } else
1652 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
1653
1654 match->pos = state->pos;
1655 match->endpos = state->endpos;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001656
Fredrik Lundh6f013982000-07-03 18:44:21 +00001657 match->lastindex = state->lastindex;
1658
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001659 return (PyObject*) match;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001660
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001661 } else if (status == 0) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001662
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001663 /* no match */
1664 Py_INCREF(Py_None);
1665 return Py_None;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001666
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001667 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001668
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001669 /* internal error */
1670 pattern_error(status);
1671 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001672}
1673
1674static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001675pattern_scanner(PatternObject* pattern, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001676{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001677 /* create search state object */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001678
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001679 ScannerObject* self;
1680
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001681 PyObject* string;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001682 int start = 0;
1683 int end = INT_MAX;
1684 if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
1685 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001686
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001687 /* create scanner object */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00001688 self = PyObject_NEW(ScannerObject, &Scanner_Type);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001689 if (!self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001690 return NULL;
1691
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001692 string = state_init(&self->state, pattern, string, start, end);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001693 if (!string) {
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001694 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001695 return NULL;
1696 }
1697
1698 Py_INCREF(pattern);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001699 self->pattern = (PyObject*) pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001700
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001701 return (PyObject*) self;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001702}
1703
Guido van Rossumb700df92000-03-31 14:59:30 +00001704static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001705pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001706{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001707 Py_XDECREF(self->pattern);
1708 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001709 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001710 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001711}
1712
1713static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001714pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001715{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001716 SRE_STATE state;
1717 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001718
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001719 PyObject* string;
1720 int start = 0;
1721 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001722 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1723 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:match", kwlist,
1724 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001725 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001726
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001727 string = state_init(&state, self, string, start, end);
1728 if (!string)
1729 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001730
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001731 state.ptr = state.start;
1732
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001733 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1734
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001735 if (state.charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001736 status = sre_match(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001737 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001738#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001739 status = sre_umatch(&state, PatternObject_GetCode(self), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001740#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001741 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001742
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001743 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1744
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001745 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001746
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001747 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001748}
1749
1750static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001751pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001752{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001753 SRE_STATE state;
1754 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001755
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001756 PyObject* string;
1757 int start = 0;
1758 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001759 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
1760 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:search", kwlist,
1761 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001762 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001763
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001764 string = state_init(&state, self, string, start, end);
1765 if (!string)
1766 return NULL;
1767
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001768 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1769
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001770 if (state.charsize == 1) {
1771 status = sre_search(&state, PatternObject_GetCode(self));
1772 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001773#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001774 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001775#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001776 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001777
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001778 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1779
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001780 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001781
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001782 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001783}
1784
1785static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001786call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001787{
1788 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001789 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001790 PyObject* func;
1791 PyObject* result;
1792
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001793 if (!args)
1794 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001795 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001796 if (!name)
1797 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001798 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001799 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001800 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001801 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001802 func = PyObject_GetAttrString(mod, function);
1803 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001804 if (!func)
1805 return NULL;
1806 result = PyObject_CallObject(func, args);
1807 Py_DECREF(func);
1808 Py_DECREF(args);
1809 return result;
1810}
1811
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001812#ifdef USE_BUILTIN_COPY
1813static int
1814deepcopy(PyObject** object, PyObject* memo)
1815{
1816 PyObject* copy;
1817
1818 copy = call(
1819 "copy", "deepcopy",
1820 Py_BuildValue("OO", *object, memo)
1821 );
1822 if (!copy)
1823 return 0;
1824
1825 Py_DECREF(*object);
1826 *object = copy;
1827
1828 return 1; /* success */
1829}
1830#endif
1831
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001832static PyObject*
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00001833join_list(PyObject* list, PyObject* pattern)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001834{
1835 /* join list elements */
1836
1837 PyObject* joiner;
1838#if PY_VERSION_HEX >= 0x01060000
1839 PyObject* function;
1840 PyObject* args;
1841#endif
1842 PyObject* result;
1843
1844 switch (PyList_GET_SIZE(list)) {
1845 case 0:
1846 Py_DECREF(list);
Fredrik Lundh09705f02002-11-22 12:46:35 +00001847 return PySequence_GetSlice(pattern, 0, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001848 case 1:
1849 result = PyList_GET_ITEM(list, 0);
1850 Py_INCREF(result);
1851 Py_DECREF(list);
1852 return result;
1853 }
1854
1855 /* two or more elements: slice out a suitable separator from the
1856 first member, and use that to join the entire list */
1857
1858 joiner = PySequence_GetSlice(pattern, 0, 0);
1859 if (!joiner)
1860 return NULL;
1861
1862#if PY_VERSION_HEX >= 0x01060000
1863 function = PyObject_GetAttrString(joiner, "join");
1864 if (!function) {
1865 Py_DECREF(joiner);
1866 return NULL;
1867 }
1868 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001869 if (!args) {
1870 Py_DECREF(function);
1871 Py_DECREF(joiner);
1872 return NULL;
1873 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001874 PyTuple_SET_ITEM(args, 0, list);
1875 result = PyObject_CallObject(function, args);
1876 Py_DECREF(args); /* also removes list */
1877 Py_DECREF(function);
1878#else
1879 result = call(
1880 "string", "join",
1881 Py_BuildValue("OO", list, joiner)
1882 );
1883#endif
1884 Py_DECREF(joiner);
1885
1886 return result;
1887}
1888
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001889static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001890pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001891{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001892 SRE_STATE state;
1893 PyObject* list;
1894 int status;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001895 int i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00001896
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001897 PyObject* string;
1898 int start = 0;
1899 int end = INT_MAX;
Fredrik Lundh562586e2000-10-03 20:43:34 +00001900 static char* kwlist[] = { "source", "pos", "endpos", NULL };
1901 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|ii:findall", kwlist,
1902 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001903 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001904
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001905 string = state_init(&state, self, string, start, end);
1906 if (!string)
1907 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001908
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001909 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001910 if (!list) {
1911 state_fini(&state);
1912 return NULL;
1913 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001914
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001915 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00001916
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001917 PyObject* item;
1918
Fredrik Lundhebc37b22000-10-28 19:30:41 +00001919 state_reset(&state);
1920
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001921 state.ptr = state.start;
1922
1923 if (state.charsize == 1) {
1924 status = sre_search(&state, PatternObject_GetCode(self));
1925 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001926#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001927 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001928#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001929 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001930
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001931 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001932 if (status == 0)
1933 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001934 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001935 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001936 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001937
1938 /* don't bother to build a match object */
1939 switch (self->groups) {
1940 case 0:
1941 b = STATE_OFFSET(&state, state.start);
1942 e = STATE_OFFSET(&state, state.ptr);
1943 item = PySequence_GetSlice(string, b, e);
1944 if (!item)
1945 goto error;
1946 break;
1947 case 1:
1948 item = state_getslice(&state, 1, string, 1);
1949 if (!item)
1950 goto error;
1951 break;
1952 default:
1953 item = PyTuple_New(self->groups);
1954 if (!item)
1955 goto error;
1956 for (i = 0; i < self->groups; i++) {
1957 PyObject* o = state_getslice(&state, i+1, string, 1);
1958 if (!o) {
1959 Py_DECREF(item);
1960 goto error;
1961 }
1962 PyTuple_SET_ITEM(item, i, o);
1963 }
1964 break;
1965 }
1966
1967 status = PyList_Append(list, item);
1968 Py_DECREF(item);
1969 if (status < 0)
1970 goto error;
1971
1972 if (state.ptr == state.start)
1973 state.start = (void*) ((char*) state.ptr + state.charsize);
1974 else
1975 state.start = state.ptr;
1976
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001977 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001978
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001979 state_fini(&state);
1980 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00001981
1982error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001983 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001984 state_fini(&state);
1985 return NULL;
1986
Guido van Rossumb700df92000-03-31 14:59:30 +00001987}
1988
Fredrik Lundh703ce812001-10-24 22:16:30 +00001989#if PY_VERSION_HEX >= 0x02020000
1990static PyObject*
1991pattern_finditer(PatternObject* pattern, PyObject* args)
1992{
1993 PyObject* scanner;
1994 PyObject* search;
1995 PyObject* iterator;
1996
1997 scanner = pattern_scanner(pattern, args);
1998 if (!scanner)
1999 return NULL;
2000
2001 search = PyObject_GetAttrString(scanner, "search");
2002 Py_DECREF(scanner);
2003 if (!search)
2004 return NULL;
2005
2006 iterator = PyCallIter_New(search, Py_None);
2007 Py_DECREF(search);
2008
2009 return iterator;
2010}
2011#endif
2012
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002013static PyObject*
2014pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2015{
2016 SRE_STATE state;
2017 PyObject* list;
2018 PyObject* item;
2019 int status;
2020 int n;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002021 int i;
2022 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002023
2024 PyObject* string;
2025 int maxsplit = 0;
2026 static char* kwlist[] = { "source", "maxsplit", NULL };
2027 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|i:split", kwlist,
2028 &string, &maxsplit))
2029 return NULL;
2030
2031 string = state_init(&state, self, string, 0, INT_MAX);
2032 if (!string)
2033 return NULL;
2034
2035 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002036 if (!list) {
2037 state_fini(&state);
2038 return NULL;
2039 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002040
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002041 n = 0;
2042 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002043
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002044 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002045
2046 state_reset(&state);
2047
2048 state.ptr = state.start;
2049
2050 if (state.charsize == 1) {
2051 status = sre_search(&state, PatternObject_GetCode(self));
2052 } else {
2053#if defined(HAVE_UNICODE)
2054 status = sre_usearch(&state, PatternObject_GetCode(self));
2055#endif
2056 }
2057
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002058 if (status <= 0) {
2059 if (status == 0)
2060 break;
2061 pattern_error(status);
2062 goto error;
2063 }
2064
2065 if (state.start == state.ptr) {
2066 if (last == state.end)
2067 break;
2068 /* skip one character */
2069 state.start = (void*) ((char*) state.ptr + state.charsize);
2070 continue;
2071 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002072
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002073 /* get segment before this match */
2074 item = PySequence_GetSlice(
2075 string, STATE_OFFSET(&state, last),
2076 STATE_OFFSET(&state, state.start)
2077 );
2078 if (!item)
2079 goto error;
2080 status = PyList_Append(list, item);
2081 Py_DECREF(item);
2082 if (status < 0)
2083 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002084
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002085 /* add groups (if any) */
2086 for (i = 0; i < self->groups; i++) {
2087 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002088 if (!item)
2089 goto error;
2090 status = PyList_Append(list, item);
2091 Py_DECREF(item);
2092 if (status < 0)
2093 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002094 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002095
2096 n = n + 1;
2097
2098 last = state.start = state.ptr;
2099
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002100 }
2101
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002102 /* get segment following last match (even if empty) */
2103 item = PySequence_GetSlice(
2104 string, STATE_OFFSET(&state, last), state.endpos
2105 );
2106 if (!item)
2107 goto error;
2108 status = PyList_Append(list, item);
2109 Py_DECREF(item);
2110 if (status < 0)
2111 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002112
2113 state_fini(&state);
2114 return list;
2115
2116error:
2117 Py_DECREF(list);
2118 state_fini(&state);
2119 return NULL;
2120
2121}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002122
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002123static PyObject*
2124pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
2125 int count, int subn)
2126{
2127 SRE_STATE state;
2128 PyObject* list;
2129 PyObject* item;
2130 PyObject* filter;
2131 PyObject* args;
2132 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002133 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002134 int status;
2135 int n;
2136 int i, b, e;
2137 int filter_is_callable;
2138
Fredrik Lundhdac58492001-10-21 21:48:30 +00002139 if (PyCallable_Check(template)) {
2140 /* sub/subn takes either a function or a template */
2141 filter = template;
2142 Py_INCREF(filter);
2143 filter_is_callable = 1;
2144 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002145 /* if not callable, check if it's a literal string */
2146 int literal;
2147 ptr = getstring(template, &n, &b);
2148 if (ptr) {
2149 if (b == 1) {
2150 literal = sre_literal_template(ptr, n);
2151 } else {
2152#if defined(HAVE_UNICODE)
2153 literal = sre_uliteral_template(ptr, n);
2154#endif
2155 }
2156 } else {
2157 PyErr_Clear();
2158 literal = 0;
2159 }
2160 if (literal) {
2161 filter = template;
2162 Py_INCREF(filter);
2163 filter_is_callable = 0;
2164 } else {
2165 /* not a literal; hand it over to the template compiler */
2166 filter = call(
2167 SRE_MODULE, "_subx",
2168 Py_BuildValue("OO", self, template)
2169 );
2170 if (!filter)
2171 return NULL;
2172 filter_is_callable = PyCallable_Check(filter);
2173 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002174 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002175
2176 string = state_init(&state, self, string, 0, INT_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002177 if (!string) {
2178 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002179 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002180 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002181
2182 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002183 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002184 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002185 state_fini(&state);
2186 return NULL;
2187 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002188
2189 n = i = 0;
2190
2191 while (!count || n < count) {
2192
2193 state_reset(&state);
2194
2195 state.ptr = state.start;
2196
2197 if (state.charsize == 1) {
2198 status = sre_search(&state, PatternObject_GetCode(self));
2199 } else {
2200#if defined(HAVE_UNICODE)
2201 status = sre_usearch(&state, PatternObject_GetCode(self));
2202#endif
2203 }
2204
2205 if (status <= 0) {
2206 if (status == 0)
2207 break;
2208 pattern_error(status);
2209 goto error;
2210 }
2211
2212 b = STATE_OFFSET(&state, state.start);
2213 e = STATE_OFFSET(&state, state.ptr);
2214
2215 if (i < b) {
2216 /* get segment before this match */
2217 item = PySequence_GetSlice(string, i, b);
2218 if (!item)
2219 goto error;
2220 status = PyList_Append(list, item);
2221 Py_DECREF(item);
2222 if (status < 0)
2223 goto error;
2224
2225 } else if (i == b && i == e && n > 0)
2226 /* ignore empty match on latest position */
2227 goto next;
2228
2229 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002230 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002231 match = pattern_new_match(self, &state, 1);
2232 if (!match)
2233 goto error;
2234 args = Py_BuildValue("(O)", match);
2235 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002236 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002237 goto error;
2238 }
2239 item = PyObject_CallObject(filter, args);
2240 Py_DECREF(args);
2241 Py_DECREF(match);
2242 if (!item)
2243 goto error;
2244 } else {
2245 /* filter is literal string */
2246 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002247 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002248 }
2249
2250 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002251 if (item != Py_None) {
2252 status = PyList_Append(list, item);
2253 Py_DECREF(item);
2254 if (status < 0)
2255 goto error;
2256 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002257
2258 i = e;
2259 n = n + 1;
2260
2261next:
2262 /* move on */
2263 if (state.ptr == state.start)
2264 state.start = (void*) ((char*) state.ptr + state.charsize);
2265 else
2266 state.start = state.ptr;
2267
2268 }
2269
2270 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002271 if (i < state.endpos) {
2272 item = PySequence_GetSlice(string, i, state.endpos);
2273 if (!item)
2274 goto error;
2275 status = PyList_Append(list, item);
2276 Py_DECREF(item);
2277 if (status < 0)
2278 goto error;
2279 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002280
2281 state_fini(&state);
2282
Guido van Rossum4e173842001-12-07 04:25:10 +00002283 Py_DECREF(filter);
2284
Fredrik Lundhdac58492001-10-21 21:48:30 +00002285 /* convert list to single string (also removes list) */
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00002286 item = join_list(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002287
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002288 if (!item)
2289 return NULL;
2290
2291 if (subn)
2292 return Py_BuildValue("Ni", item, n);
2293
2294 return item;
2295
2296error:
2297 Py_DECREF(list);
2298 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002299 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002300 return NULL;
2301
2302}
2303
2304static PyObject*
2305pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2306{
2307 PyObject* template;
2308 PyObject* string;
2309 int count = 0;
2310 static char* kwlist[] = { "repl", "string", "count", NULL };
2311 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
2312 &template, &string, &count))
2313 return NULL;
2314
2315 return pattern_subx(self, template, string, count, 0);
2316}
2317
2318static PyObject*
2319pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2320{
2321 PyObject* template;
2322 PyObject* string;
2323 int count = 0;
2324 static char* kwlist[] = { "repl", "string", "count", NULL };
2325 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
2326 &template, &string, &count))
2327 return NULL;
2328
2329 return pattern_subx(self, template, string, count, 1);
2330}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002331
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002332static PyObject*
2333pattern_copy(PatternObject* self, PyObject* args)
2334{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002335#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002336 PatternObject* copy;
2337 int offset;
2338
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002339 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2340 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002341
2342 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2343 if (!copy)
2344 return NULL;
2345
2346 offset = offsetof(PatternObject, groups);
2347
2348 Py_XINCREF(self->groupindex);
2349 Py_XINCREF(self->indexgroup);
2350 Py_XINCREF(self->pattern);
2351
2352 memcpy((char*) copy + offset, (char*) self + offset,
2353 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2354
2355 return (PyObject*) copy;
2356#else
2357 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2358 return NULL;
2359#endif
2360}
2361
2362static PyObject*
2363pattern_deepcopy(PatternObject* self, PyObject* args)
2364{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002365#ifdef USE_BUILTIN_COPY
2366 PatternObject* copy;
2367
2368 PyObject* memo;
2369 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2370 return NULL;
2371
2372 copy = (PatternObject*) pattern_copy(self, Py_None);
2373 if (!copy)
2374 return NULL;
2375
2376 if (!deepcopy(&copy->groupindex, memo) ||
2377 !deepcopy(&copy->indexgroup, memo) ||
2378 !deepcopy(&copy->pattern, memo)) {
2379 Py_DECREF(copy);
2380 return NULL;
2381 }
2382
2383#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002384 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2385 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002386#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002387}
2388
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002389static PyMethodDef pattern_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002390 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS},
2391 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS},
2392 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS},
2393 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS},
2394 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS},
2395 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002396#if PY_VERSION_HEX >= 0x02020000
2397 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS},
2398#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002399 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002400 {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS},
2401 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002402 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002403};
2404
2405static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002406pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002407{
2408 PyObject* res;
2409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002410 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002412 if (res)
2413 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002414
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002415 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002416
2417 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002418 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002419 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002420 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002421 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002422
2423 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002424 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002425
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002426 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002427 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002429 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002430 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002431 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002432 }
2433
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002434 PyErr_SetString(PyExc_AttributeError, name);
2435 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002436}
2437
2438statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002439 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002440 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002441 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002442 (destructor)pattern_dealloc, /*tp_dealloc*/
2443 0, /*tp_print*/
2444 (getattrfunc)pattern_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002445};
2446
2447/* -------------------------------------------------------------------- */
2448/* match methods */
2449
2450static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002451match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002452{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002453 Py_XDECREF(self->regs);
2454 Py_XDECREF(self->string);
2455 Py_DECREF(self->pattern);
2456 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002457}
2458
2459static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002460match_getslice_by_index(MatchObject* self, int index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002461{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002462 if (index < 0 || index >= self->groups) {
2463 /* raise IndexError if we were given a bad group number */
2464 PyErr_SetString(
2465 PyExc_IndexError,
2466 "no such group"
2467 );
2468 return NULL;
2469 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002470
Fredrik Lundh6f013982000-07-03 18:44:21 +00002471 index *= 2;
2472
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002473 if (self->string == Py_None || self->mark[index] < 0) {
2474 /* return default value if the string or group is undefined */
2475 Py_INCREF(def);
2476 return def;
2477 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002478
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002479 return PySequence_GetSlice(
2480 self->string, self->mark[index], self->mark[index+1]
2481 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002482}
2483
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002484static int
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002485match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002486{
Fredrik Lundh6f013982000-07-03 18:44:21 +00002487 int i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002488
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002489 if (PyInt_Check(index))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002490 return (int) PyInt_AS_LONG(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002491
Fredrik Lundh6f013982000-07-03 18:44:21 +00002492 i = -1;
2493
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002494 if (self->pattern->groupindex) {
2495 index = PyObject_GetItem(self->pattern->groupindex, index);
2496 if (index) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002497 if (PyInt_Check(index))
2498 i = (int) PyInt_AS_LONG(index);
2499 Py_DECREF(index);
2500 } else
2501 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002502 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002503
2504 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002505}
2506
2507static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002508match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002509{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002510 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002511}
2512
2513static PyObject*
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002514match_expand(MatchObject* self, PyObject* args)
2515{
2516 PyObject* template;
2517 if (!PyArg_ParseTuple(args, "O:expand", &template))
2518 return NULL;
2519
2520 /* delegate to Python code */
2521 return call(
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002522 SRE_MODULE, "_expand",
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002523 Py_BuildValue("OOO", self->pattern, self, template)
2524 );
2525}
2526
2527static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002528match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002529{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002530 PyObject* result;
2531 int i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002532
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002533 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002534
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002535 switch (size) {
2536 case 0:
2537 result = match_getslice(self, Py_False, Py_None);
2538 break;
2539 case 1:
2540 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2541 break;
2542 default:
2543 /* fetch multiple items */
2544 result = PyTuple_New(size);
2545 if (!result)
2546 return NULL;
2547 for (i = 0; i < size; i++) {
2548 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002549 self, PyTuple_GET_ITEM(args, i), Py_None
2550 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002551 if (!item) {
2552 Py_DECREF(result);
2553 return NULL;
2554 }
2555 PyTuple_SET_ITEM(result, i, item);
2556 }
2557 break;
2558 }
2559 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002560}
2561
2562static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002563match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002564{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002565 PyObject* result;
2566 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002567
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002568 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002569 static char* kwlist[] = { "default", NULL };
2570 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002571 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002572
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002573 result = PyTuple_New(self->groups-1);
2574 if (!result)
2575 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002576
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002577 for (index = 1; index < self->groups; index++) {
2578 PyObject* item;
2579 item = match_getslice_by_index(self, index, def);
2580 if (!item) {
2581 Py_DECREF(result);
2582 return NULL;
2583 }
2584 PyTuple_SET_ITEM(result, index-1, item);
2585 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002586
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002587 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002588}
2589
2590static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002591match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002592{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002593 PyObject* result;
2594 PyObject* keys;
2595 int index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002596
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002597 PyObject* def = Py_None;
Fredrik Lundh562586e2000-10-03 20:43:34 +00002598 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002599 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002600 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002602 result = PyDict_New();
2603 if (!result || !self->pattern->groupindex)
2604 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002606 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002607 if (!keys)
2608 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002609
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002610 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002611 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002612 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002613 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002614 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002615 if (!key)
2616 goto failed;
2617 value = match_getslice(self, key, def);
2618 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002619 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002620 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002621 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002622 status = PyDict_SetItem(result, key, value);
2623 Py_DECREF(value);
2624 if (status < 0)
2625 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002626 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002627
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002628 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002629
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002630 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002631
2632failed:
2633 Py_DECREF(keys);
2634 Py_DECREF(result);
2635 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002636}
2637
2638static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002639match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002640{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002641 int index;
2642
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002643 PyObject* index_ = Py_False; /* zero */
2644 if (!PyArg_ParseTuple(args, "|O:start", &index_))
2645 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002646
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002647 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002648
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002649 if (index < 0 || index >= self->groups) {
2650 PyErr_SetString(
2651 PyExc_IndexError,
2652 "no such group"
2653 );
2654 return NULL;
2655 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002656
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002657 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002658 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002659}
2660
2661static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002662match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002663{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002664 int index;
2665
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002666 PyObject* index_ = Py_False; /* zero */
2667 if (!PyArg_ParseTuple(args, "|O:end", &index_))
2668 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002669
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002670 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002671
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002672 if (index < 0 || index >= self->groups) {
2673 PyErr_SetString(
2674 PyExc_IndexError,
2675 "no such group"
2676 );
2677 return NULL;
2678 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002679
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002680 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002681 return Py_BuildValue("i", self->mark[index*2+1]);
2682}
2683
2684LOCAL(PyObject*)
2685_pair(int i1, int i2)
2686{
2687 PyObject* pair;
2688 PyObject* item;
2689
2690 pair = PyTuple_New(2);
2691 if (!pair)
2692 return NULL;
2693
2694 item = PyInt_FromLong(i1);
2695 if (!item)
2696 goto error;
2697 PyTuple_SET_ITEM(pair, 0, item);
2698
2699 item = PyInt_FromLong(i2);
2700 if (!item)
2701 goto error;
2702 PyTuple_SET_ITEM(pair, 1, item);
2703
2704 return pair;
2705
2706 error:
2707 Py_DECREF(pair);
2708 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002709}
2710
2711static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002712match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002713{
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002714 int index;
2715
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002716 PyObject* index_ = Py_False; /* zero */
2717 if (!PyArg_ParseTuple(args, "|O:span", &index_))
2718 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002719
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002720 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002721
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002722 if (index < 0 || index >= self->groups) {
2723 PyErr_SetString(
2724 PyExc_IndexError,
2725 "no such group"
2726 );
2727 return NULL;
2728 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002729
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002730 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002731 return _pair(self->mark[index*2], self->mark[index*2+1]);
2732}
2733
2734static PyObject*
2735match_regs(MatchObject* self)
2736{
2737 PyObject* regs;
2738 PyObject* item;
2739 int index;
2740
2741 regs = PyTuple_New(self->groups);
2742 if (!regs)
2743 return NULL;
2744
2745 for (index = 0; index < self->groups; index++) {
2746 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2747 if (!item) {
2748 Py_DECREF(regs);
2749 return NULL;
2750 }
2751 PyTuple_SET_ITEM(regs, index, item);
2752 }
2753
2754 Py_INCREF(regs);
2755 self->regs = regs;
2756
2757 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002758}
2759
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002760static PyObject*
2761match_copy(MatchObject* self, PyObject* args)
2762{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002763#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002764 MatchObject* copy;
2765 int slots, offset;
2766
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002767 if (args != Py_None && !PyArg_ParseTuple(args, ":__copy__"))
2768 return NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002769
2770 slots = 2 * (self->pattern->groups+1);
2771
2772 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2773 if (!copy)
2774 return NULL;
2775
2776 /* this value a constant, but any compiler should be able to
2777 figure that out all by itself */
2778 offset = offsetof(MatchObject, string);
2779
2780 Py_XINCREF(self->pattern);
2781 Py_XINCREF(self->string);
2782 Py_XINCREF(self->regs);
2783
2784 memcpy((char*) copy + offset, (char*) self + offset,
2785 sizeof(MatchObject) + slots * sizeof(int) - offset);
2786
2787 return (PyObject*) copy;
2788#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002789 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002790 return NULL;
2791#endif
2792}
2793
2794static PyObject*
2795match_deepcopy(MatchObject* self, PyObject* args)
2796{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002797#ifdef USE_BUILTIN_COPY
2798 MatchObject* copy;
2799
2800 PyObject* memo;
2801 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
2802 return NULL;
2803
2804 copy = (MatchObject*) match_copy(self, Py_None);
2805 if (!copy)
2806 return NULL;
2807
2808 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2809 !deepcopy(&copy->string, memo) ||
2810 !deepcopy(&copy->regs, memo)) {
2811 Py_DECREF(copy);
2812 return NULL;
2813 }
2814
2815#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002816 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2817 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002818#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002819}
2820
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002821static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00002822 {"group", (PyCFunction) match_group, METH_VARARGS},
2823 {"start", (PyCFunction) match_start, METH_VARARGS},
2824 {"end", (PyCFunction) match_end, METH_VARARGS},
2825 {"span", (PyCFunction) match_span, METH_VARARGS},
2826 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
2827 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
2828 {"expand", (PyCFunction) match_expand, METH_VARARGS},
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002829 {"__copy__", (PyCFunction) match_copy, METH_VARARGS},
2830 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002831 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002832};
2833
2834static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002835match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002836{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002837 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002838
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002839 res = Py_FindMethod(match_methods, (PyObject*) self, name);
2840 if (res)
2841 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002842
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002843 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002845 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002846 if (self->lastindex >= 0)
2847 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00002848 Py_INCREF(Py_None);
2849 return Py_None;
2850 }
2851
2852 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00002853 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00002854 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00002855 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00002856 );
2857 if (result)
2858 return result;
2859 PyErr_Clear();
2860 }
2861 Py_INCREF(Py_None);
2862 return Py_None;
2863 }
2864
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002865 if (!strcmp(name, "string")) {
2866 if (self->string) {
2867 Py_INCREF(self->string);
2868 return self->string;
2869 } else {
2870 Py_INCREF(Py_None);
2871 return Py_None;
2872 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002873 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002874
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002875 if (!strcmp(name, "regs")) {
2876 if (self->regs) {
2877 Py_INCREF(self->regs);
2878 return self->regs;
2879 } else
2880 return match_regs(self);
2881 }
2882
2883 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002884 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002885 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002886 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002887
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002888 if (!strcmp(name, "pos"))
2889 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002890
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002891 if (!strcmp(name, "endpos"))
2892 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00002893
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002894 PyErr_SetString(PyExc_AttributeError, name);
2895 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002896}
2897
2898/* FIXME: implement setattr("string", None) as a special case (to
2899 detach the associated string, if any */
2900
2901statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002902 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002903 0, "_" SRE_MODULE ".SRE_Match",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002904 sizeof(MatchObject), sizeof(int),
2905 (destructor)match_dealloc, /*tp_dealloc*/
2906 0, /*tp_print*/
2907 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002908};
2909
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002910/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002911/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002912
2913static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002914scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002915{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002916 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002917 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002918 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002919}
2920
2921static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002922scanner_match(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002923{
2924 SRE_STATE* state = &self->state;
2925 PyObject* match;
2926 int status;
2927
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002928 state_reset(state);
2929
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002930 state->ptr = state->start;
2931
2932 if (state->charsize == 1) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002933 status = sre_match(state, PatternObject_GetCode(self->pattern), 1);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002934 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002935#if defined(HAVE_UNICODE)
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00002936 status = sre_umatch(state, PatternObject_GetCode(self->pattern), 1);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002937#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002938 }
2939
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002940 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002941 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002942
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00002943 if ((status == 0 || state->ptr == state->start) &&
2944 state->ptr < state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002945 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002946 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002947 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002948
2949 return match;
2950}
2951
2952
2953static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002954scanner_search(ScannerObject* self, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002955{
2956 SRE_STATE* state = &self->state;
2957 PyObject* match;
2958 int status;
2959
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002960 state_reset(state);
2961
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002962 state->ptr = state->start;
2963
2964 if (state->charsize == 1) {
2965 status = sre_search(state, PatternObject_GetCode(self->pattern));
2966 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002967#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002968 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002969#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002970 }
2971
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002972 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002973 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002974
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00002975 if ((status == 0 || state->ptr == state->start) &&
2976 state->ptr < state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002977 state->start = (void*) ((char*) state->ptr + state->charsize);
2978 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002979 state->start = state->ptr;
2980
2981 return match;
2982}
2983
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002984static PyMethodDef scanner_methods[] = {
Neal Norwitzbb2769f2002-03-31 15:46:00 +00002985 /* FIXME: use METH_OLDARGS instead of 0 or fix to use METH_VARARGS */
2986 /* METH_OLDARGS is not in Python 1.5.2 */
2987 {"match", (PyCFunction) scanner_match, 0},
2988 {"search", (PyCFunction) scanner_search, 0},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002989 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002990};
2991
2992static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002993scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002994{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002995 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002996
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002997 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
2998 if (res)
2999 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003000
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003001 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003002
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003003 /* attributes */
3004 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003005 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003006 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003007 }
3008
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003009 PyErr_SetString(PyExc_AttributeError, name);
3010 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003011}
3012
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003013statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003014 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00003015 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003016 sizeof(ScannerObject), 0,
3017 (destructor)scanner_dealloc, /*tp_dealloc*/
3018 0, /*tp_print*/
3019 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003020};
3021
Guido van Rossumb700df92000-03-31 14:59:30 +00003022static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00003023 {"compile", _compile, METH_VARARGS},
3024 {"getcodesize", sre_codesize, METH_VARARGS},
3025 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003026 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003027};
3028
Mark Hammond8235ea12002-07-19 06:55:41 +00003029PyMODINIT_FUNC init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00003030{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003031 PyObject* m;
3032 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00003033 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003034
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003035 /* Patch object types */
3036 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003037 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00003038
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00003039 m = Py_InitModule("_" SRE_MODULE, _functions);
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003040 d = PyModule_GetDict(m);
3041
Fredrik Lundh21009b92001-09-18 18:47:09 +00003042 x = PyInt_FromLong(SRE_MAGIC);
3043 if (x) {
3044 PyDict_SetItemString(d, "MAGIC", x);
3045 Py_DECREF(x);
3046 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003047
Fredrik Lundh21009b92001-09-18 18:47:09 +00003048 x = PyString_FromString(copyright);
3049 if (x) {
3050 PyDict_SetItemString(d, "copyright", x);
3051 Py_DECREF(x);
3052 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003053}
3054
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003055#endif /* !defined(SRE_RECURSIVE) */