blob: d4029659515724865d20672066bc6028d162e4d0 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
Fredrik Lundhebc37b22000-10-28 19:30:41 +00009 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000010 * 2000-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000011 * 2000-09-20 fl added expand method
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000012 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000013 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000014 * 2001-04-28 fl added __copy__ methods (work in progress)
Fredrik Lundh09705f02002-11-22 12:46:35 +000015 * 2001-05-14 fl fixes for 1.5.2 compatibility
Fredrik Lundhf71ae462001-07-02 17:04:48 +000016 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000017 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000018 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000019 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh703ce812001-10-24 22:16:30 +000020 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000021 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Fredrik Lundh09705f02002-11-22 12:46:35 +000022 * 2002-11-09 fl fixed empty sub/subn return type
Martin v. Löwis78e2f062003-04-19 12:56:08 +000023 * 2003-04-18 mvl fully support 4-byte codes
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +000024 * 2003-10-17 gn implemented non recursive scheme
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000026 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000027 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * other compatibility work.
35 */
36
37#ifndef SRE_RECURSIVE
38
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000039static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000040 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000041
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042#define PY_SSIZE_T_CLEAN
43
Guido van Rossumb700df92000-03-31 14:59:30 +000044#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000045#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000046
47#include "sre.h"
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000052#if !defined(SRE_MODULE)
53#define SRE_MODULE "sre"
54#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000055
Thomas Wouters9ada3d62006-04-21 09:47:09 +000056#define SRE_PY_MODULE "re"
57
Guido van Rossumb700df92000-03-31 14:59:30 +000058/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000060
Fredrik Lundh971e78b2001-10-20 17:48:46 +000061#if PY_VERSION_HEX >= 0x01060000
62#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000063/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000064#define HAVE_UNICODE
65#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000066#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000067
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000069/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070
71/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000072#define USE_FAST_SEARCH
73
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000074/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000076
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000077/* enables copy/deepcopy handling (work in progress) */
78#undef USE_BUILTIN_COPY
79
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000080#if PY_VERSION_HEX < 0x01060000
81#define PyObject_DEL(op) PyMem_DEL((op))
82#endif
83
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000084/* -------------------------------------------------------------------- */
85
Fredrik Lundh80946112000-06-29 18:03:25 +000086#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000087#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000088#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000089/* fastest possible local call under MSVC */
90#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000092#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000093#else
94#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000095#endif
96
97/* error codes */
98#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000099#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000100#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000101#define SRE_ERROR_MEMORY -9 /* out of memory */
102
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000103#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000104#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000105#else
106#define TRACE(v)
107#endif
108
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000109/* -------------------------------------------------------------------- */
110/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000111
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000112/* default character predicates (run sre_chars.py to regenerate tables) */
113
114#define SRE_DIGIT_MASK 1
115#define SRE_SPACE_MASK 2
116#define SRE_LINEBREAK_MASK 4
117#define SRE_ALNUM_MASK 8
118#define SRE_WORD_MASK 16
119
Fredrik Lundh21009b92001-09-18 18:47:09 +0000120/* FIXME: this assumes ASCII. create tables in init_sre() instead */
121
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000122static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1232, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12525, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12624, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1270, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12824, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
129
Fredrik Lundhb389df32000-06-29 12:48:37 +0000130static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000013110, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13227, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
13344, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13461, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
135108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
136122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
137106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
138120, 121, 122, 123, 124, 125, 126, 127 };
139
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000140#define SRE_IS_DIGIT(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
142#define SRE_IS_SPACE(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
144#define SRE_IS_LINEBREAK(ch)\
145 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
146#define SRE_IS_ALNUM(ch)\
147 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
148#define SRE_IS_WORD(ch)\
149 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000150
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000151static unsigned int sre_lower(unsigned int ch)
152{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000153 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000154}
155
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000156/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000157/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
158 * warnings when c's type supports only numbers < N+1 */
159#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
160#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000161#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000162#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
164
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000165static unsigned int sre_lower_locale(unsigned int ch)
166{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000167 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000168}
169
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000170/* unicode-specific character predicates */
171
172#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000173
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000174#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
175#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
176#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000177#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000178#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000179
180static unsigned int sre_lower_unicode(unsigned int ch)
181{
182 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
183}
184
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000185#endif
186
Guido van Rossumb700df92000-03-31 14:59:30 +0000187LOCAL(int)
188sre_category(SRE_CODE category, unsigned int ch)
189{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000190 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000191
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000192 case SRE_CATEGORY_DIGIT:
193 return SRE_IS_DIGIT(ch);
194 case SRE_CATEGORY_NOT_DIGIT:
195 return !SRE_IS_DIGIT(ch);
196 case SRE_CATEGORY_SPACE:
197 return SRE_IS_SPACE(ch);
198 case SRE_CATEGORY_NOT_SPACE:
199 return !SRE_IS_SPACE(ch);
200 case SRE_CATEGORY_WORD:
201 return SRE_IS_WORD(ch);
202 case SRE_CATEGORY_NOT_WORD:
203 return !SRE_IS_WORD(ch);
204 case SRE_CATEGORY_LINEBREAK:
205 return SRE_IS_LINEBREAK(ch);
206 case SRE_CATEGORY_NOT_LINEBREAK:
207 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000208
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000209 case SRE_CATEGORY_LOC_WORD:
210 return SRE_LOC_IS_WORD(ch);
211 case SRE_CATEGORY_LOC_NOT_WORD:
212 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000213
214#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000215 case SRE_CATEGORY_UNI_DIGIT:
216 return SRE_UNI_IS_DIGIT(ch);
217 case SRE_CATEGORY_UNI_NOT_DIGIT:
218 return !SRE_UNI_IS_DIGIT(ch);
219 case SRE_CATEGORY_UNI_SPACE:
220 return SRE_UNI_IS_SPACE(ch);
221 case SRE_CATEGORY_UNI_NOT_SPACE:
222 return !SRE_UNI_IS_SPACE(ch);
223 case SRE_CATEGORY_UNI_WORD:
224 return SRE_UNI_IS_WORD(ch);
225 case SRE_CATEGORY_UNI_NOT_WORD:
226 return !SRE_UNI_IS_WORD(ch);
227 case SRE_CATEGORY_UNI_LINEBREAK:
228 return SRE_UNI_IS_LINEBREAK(ch);
229 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
230 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000231#else
232 case SRE_CATEGORY_UNI_DIGIT:
233 return SRE_IS_DIGIT(ch);
234 case SRE_CATEGORY_UNI_NOT_DIGIT:
235 return !SRE_IS_DIGIT(ch);
236 case SRE_CATEGORY_UNI_SPACE:
237 return SRE_IS_SPACE(ch);
238 case SRE_CATEGORY_UNI_NOT_SPACE:
239 return !SRE_IS_SPACE(ch);
240 case SRE_CATEGORY_UNI_WORD:
241 return SRE_LOC_IS_WORD(ch);
242 case SRE_CATEGORY_UNI_NOT_WORD:
243 return !SRE_LOC_IS_WORD(ch);
244 case SRE_CATEGORY_UNI_LINEBREAK:
245 return SRE_IS_LINEBREAK(ch);
246 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
247 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000248#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000249 }
250 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000251}
252
253/* helpers */
254
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000255static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000256data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000257{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000258 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000259 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000260 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000261 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000262 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000263}
264
265static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000266data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000267{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000268 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000269 minsize = state->data_stack_base+size;
270 cursize = state->data_stack_size;
271 if (cursize < minsize) {
272 void* stack;
273 cursize = minsize+minsize/4+1024;
274 TRACE(("allocate/grow stack %d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000276 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000277 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000278 return SRE_ERROR_MEMORY;
279 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000280 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000281 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000282 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000283 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000284}
285
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000286/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000287
288#define SRE_CHAR unsigned char
289#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000290#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000291#define SRE_CHARSET sre_charset
292#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000293#define SRE_MATCH sre_match
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000294#define SRE_MATCH_CONTEXT sre_match_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000295#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000296#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297
298#if defined(HAVE_UNICODE)
299
Guido van Rossumb700df92000-03-31 14:59:30 +0000300#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000301#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000302#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000303
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000304#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000305#undef SRE_SEARCH
306#undef SRE_MATCH
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000307#undef SRE_MATCH_CONTEXT
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000308#undef SRE_INFO
309#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000310#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000311#undef SRE_AT
312#undef SRE_CHAR
313
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000314/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000315
316#define SRE_CHAR Py_UNICODE
317#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000318#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000319#define SRE_CHARSET sre_ucharset
320#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000321#define SRE_MATCH sre_umatch
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000322#define SRE_MATCH_CONTEXT sre_umatch_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000323#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000324#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000325#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000326
327#endif /* SRE_RECURSIVE */
328
329/* -------------------------------------------------------------------- */
330/* String matching engine */
331
332/* the following section is compiled twice, with different character
333 settings */
334
335LOCAL(int)
336SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
337{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000338 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000339
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000340 Py_ssize_t thisp, thatp;
Guido van Rossumb700df92000-03-31 14:59:30 +0000341
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000343
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000344 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000345 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000346 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000347
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000348 case SRE_AT_BEGINNING_LINE:
349 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000350 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000352 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000353 return (((void*) (ptr+1) == state->end &&
354 SRE_IS_LINEBREAK((int) ptr[0])) ||
355 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000356
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000357 case SRE_AT_END_LINE:
358 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000359 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000360
Fredrik Lundh770617b2001-01-14 15:06:11 +0000361 case SRE_AT_END_STRING:
362 return ((void*) ptr == state->end);
363
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000364 case SRE_AT_BOUNDARY:
365 if (state->beginning == state->end)
366 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000367 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000368 SRE_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000369 thisp = ((void*) ptr < state->end) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000370 SRE_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000371 return thisp != thatp;
Fredrik Lundh80946112000-06-29 18:03:25 +0000372
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000373 case SRE_AT_NON_BOUNDARY:
374 if (state->beginning == state->end)
375 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000376 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000377 SRE_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000378 thisp = ((void*) ptr < state->end) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000379 SRE_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000380 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000381
382 case SRE_AT_LOC_BOUNDARY:
383 if (state->beginning == state->end)
384 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000385 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000386 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000387 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000388 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000389 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000390
391 case SRE_AT_LOC_NON_BOUNDARY:
392 if (state->beginning == state->end)
393 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000394 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000395 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000396 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000397 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000398 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000399
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000400#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000401 case SRE_AT_UNI_BOUNDARY:
402 if (state->beginning == state->end)
403 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000404 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000405 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000406 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000407 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000408 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000409
410 case SRE_AT_UNI_NON_BOUNDARY:
411 if (state->beginning == state->end)
412 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000413 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000414 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000415 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000416 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000417 return thisp == thatp;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000418#endif
419
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000420 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000421
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000423}
424
425LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000426SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000427{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000428 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000432 for (;;) {
433 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000434
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000435 case SRE_OP_FAILURE:
436 return !ok;
437
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000438 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000439 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000440 if (ch == set[0])
441 return ok;
442 set++;
443 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000444
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000445 case SRE_OP_CATEGORY:
446 /* <CATEGORY> <code> */
447 if (sre_category(set[0], (int) ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000448 return ok;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000449 set += 1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000450 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000451
Fredrik Lundh3562f112000-07-02 12:00:07 +0000452 case SRE_OP_CHARSET:
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000453 if (sizeof(SRE_CODE) == 2) {
454 /* <CHARSET> <bitmap> (16 bits per code word) */
455 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
456 return ok;
457 set += 16;
Tim Peters3d563502006-01-21 02:47:53 +0000458 }
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000459 else {
460 /* <CHARSET> <bitmap> (32 bits per code word) */
461 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
462 return ok;
463 set += 8;
464 }
Fredrik Lundh3562f112000-07-02 12:00:07 +0000465 break;
466
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000467 case SRE_OP_RANGE:
468 /* <RANGE> <lower> <upper> */
469 if (set[0] <= ch && ch <= set[1])
470 return ok;
471 set += 2;
472 break;
473
474 case SRE_OP_NEGATE:
475 ok = !ok;
476 break;
477
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000478 case SRE_OP_BIGCHARSET:
479 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
480 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000481 Py_ssize_t count, block;
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000482 count = *(set++);
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000483
484 if (sizeof(SRE_CODE) == 2) {
485 block = ((unsigned char*)set)[ch >> 8];
486 set += 128;
487 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
488 return ok;
489 set += count*16;
490 }
491 else {
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000492 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
493 * warnings when c's type supports only numbers < N+1 */
494 if (!(ch & ~65535))
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000495 block = ((unsigned char*)set)[ch >> 8];
496 else
497 block = -1;
498 set += 64;
Tim Peters3d563502006-01-21 02:47:53 +0000499 if (block >=0 &&
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000500 (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
501 return ok;
502 set += count*8;
503 }
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000504 break;
505 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000506
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000507 default:
508 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000509 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000510 return 0;
511 }
512 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000513}
514
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000515LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000516
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000517LOCAL(Py_ssize_t)
518SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000519{
520 SRE_CODE chr;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000521 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
522 SRE_CHAR* end = (SRE_CHAR *)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000523 Py_ssize_t i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000524
525 /* adjust end */
526 if (maxcount < end - ptr && maxcount != 65535)
527 end = ptr + maxcount;
528
529 switch (pattern[0]) {
530
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000531 case SRE_OP_IN:
532 /* repeated set */
533 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
534 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
535 ptr++;
536 break;
537
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000538 case SRE_OP_ANY:
539 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000540 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000541 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
542 ptr++;
543 break;
544
545 case SRE_OP_ANY_ALL:
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000546 /* repeated dot wildcard. skip to the end of the target
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000547 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000548 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000549 ptr = end;
550 break;
551
552 case SRE_OP_LITERAL:
553 /* repeated literal */
554 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000555 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000556 while (ptr < end && (SRE_CODE) *ptr == chr)
557 ptr++;
558 break;
559
560 case SRE_OP_LITERAL_IGNORE:
561 /* repeated literal */
562 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000563 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000564 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
565 ptr++;
566 break;
567
568 case SRE_OP_NOT_LITERAL:
569 /* repeated non-literal */
570 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000571 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000572 while (ptr < end && (SRE_CODE) *ptr != chr)
573 ptr++;
574 break;
Tim Peters3d563502006-01-21 02:47:53 +0000575
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000576 case SRE_OP_NOT_LITERAL_IGNORE:
577 /* repeated non-literal */
578 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000579 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000580 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
581 ptr++;
582 break;
583
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000584 default:
585 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000586 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000587 while ((SRE_CHAR*) state->ptr < end) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000588 i = SRE_MATCH(state, pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000589 if (i < 0)
590 return i;
591 if (!i)
592 break;
593 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000594 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
595 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000596 return (SRE_CHAR*) state->ptr - ptr;
597 }
598
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000599 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000600 return ptr - (SRE_CHAR*) state->ptr;
601}
602
Fredrik Lundh33accc12000-08-27 20:59:47 +0000603#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000604LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000605SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
606{
607 /* check if an SRE_OP_INFO block matches at the current position.
608 returns the number of SRE_CODE objects to skip if successful, 0
609 if no match */
610
611 SRE_CHAR* end = state->end;
612 SRE_CHAR* ptr = state->ptr;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000613 Py_ssize_t i;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000614
615 /* check minimal length */
616 if (pattern[3] && (end - ptr) < pattern[3])
617 return 0;
618
619 /* check known prefix */
620 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
621 /* <length> <skip> <prefix data> <overlap data> */
622 for (i = 0; i < pattern[5]; i++)
623 if ((SRE_CODE) ptr[i] != pattern[7 + i])
624 return 0;
625 return pattern[0] + 2 * pattern[6];
626 }
627 return pattern[0];
628}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000629#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000630
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000631/* The macros below should be used to protect recursive SRE_MATCH()
632 * calls that *failed* and do *not* return immediately (IOW, those
633 * that will backtrack). Explaining:
634 *
635 * - Recursive SRE_MATCH() returned true: that's usually a success
636 * (besides atypical cases like ASSERT_NOT), therefore there's no
637 * reason to restore lastmark;
638 *
639 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
640 * is returning to the caller: If the current SRE_MATCH() is the
641 * top function of the recursion, returning false will be a matching
642 * failure, and it doesn't matter where lastmark is pointing to.
643 * If it's *not* the top function, it will be a recursive SRE_MATCH()
644 * failure by itself, and the calling SRE_MATCH() will have to deal
645 * with the failure by the same rules explained here (it will restore
646 * lastmark by itself if necessary);
647 *
648 * - Recursive SRE_MATCH() returned false, and will continue the
649 * outside 'for' loop: must be protected when breaking, since the next
650 * OP could potentially depend on lastmark;
Tim Peters3d563502006-01-21 02:47:53 +0000651 *
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000652 * - Recursive SRE_MATCH() returned false, and will be called again
653 * inside a local for/while loop: must be protected between each
654 * loop iteration, since the recursive SRE_MATCH() could do anything,
655 * and could potentially depend on lastmark.
656 *
657 * For more information, check the discussion at SF patch #712900.
658 */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000659#define LASTMARK_SAVE() \
660 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000661 ctx->lastmark = state->lastmark; \
662 ctx->lastindex = state->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000663 } while (0)
664#define LASTMARK_RESTORE() \
665 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000666 state->lastmark = ctx->lastmark; \
667 state->lastindex = ctx->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000668 } while (0)
669
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000670#define RETURN_ERROR(i) do { return i; } while(0)
671#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
672#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
673
674#define RETURN_ON_ERROR(i) \
675 do { if (i < 0) RETURN_ERROR(i); } while (0)
676#define RETURN_ON_SUCCESS(i) \
677 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
678#define RETURN_ON_FAILURE(i) \
679 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
680
681#define SFY(x) #x
682
683#define DATA_STACK_ALLOC(state, type, ptr) \
684do { \
685 alloc_pos = state->data_stack_base; \
686 TRACE(("allocating %s in %d (%d)\n", \
687 SFY(type), alloc_pos, sizeof(type))); \
688 if (state->data_stack_size < alloc_pos+sizeof(type)) { \
689 int j = data_stack_grow(state, sizeof(type)); \
690 if (j < 0) return j; \
691 if (ctx_pos != -1) \
692 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
693 } \
694 ptr = (type*)(state->data_stack+alloc_pos); \
695 state->data_stack_base += sizeof(type); \
696} while (0)
697
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000698#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
699do { \
700 TRACE(("looking up %s at %d\n", SFY(type), pos)); \
701 ptr = (type*)(state->data_stack+pos); \
702} while (0)
703
704#define DATA_STACK_PUSH(state, data, size) \
705do { \
706 TRACE(("copy data in %p to %d (%d)\n", \
707 data, state->data_stack_base, size)); \
708 if (state->data_stack_size < state->data_stack_base+size) { \
709 int j = data_stack_grow(state, size); \
710 if (j < 0) return j; \
711 if (ctx_pos != -1) \
712 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
713 } \
714 memcpy(state->data_stack+state->data_stack_base, data, size); \
715 state->data_stack_base += size; \
716} while (0)
717
718#define DATA_STACK_POP(state, data, size, discard) \
719do { \
720 TRACE(("copy data to %p from %d (%d)\n", \
721 data, state->data_stack_base-size, size)); \
722 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
723 if (discard) \
724 state->data_stack_base -= size; \
725} while (0)
726
727#define DATA_STACK_POP_DISCARD(state, size) \
728do { \
729 TRACE(("discard data from %d (%d)\n", \
730 state->data_stack_base-size, size)); \
731 state->data_stack_base -= size; \
732} while(0)
733
734#define DATA_PUSH(x) \
735 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
736#define DATA_POP(x) \
737 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000738#define DATA_POP_DISCARD(x) \
739 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
740#define DATA_ALLOC(t,p) \
741 DATA_STACK_ALLOC(state, t, p)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000742#define DATA_LOOKUP_AT(t,p,pos) \
743 DATA_STACK_LOOKUP_AT(state,t,p,pos)
744
745#define MARK_PUSH(lastmark) \
746 do if (lastmark > 0) { \
747 i = lastmark; /* ctx->lastmark may change if reallocated */ \
748 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
749 } while (0)
750#define MARK_POP(lastmark) \
751 do if (lastmark > 0) { \
752 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
753 } while (0)
754#define MARK_POP_KEEP(lastmark) \
755 do if (lastmark > 0) { \
756 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
757 } while (0)
758#define MARK_POP_DISCARD(lastmark) \
759 do if (lastmark > 0) { \
760 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
761 } while (0)
762
763#define JUMP_NONE 0
764#define JUMP_MAX_UNTIL_1 1
765#define JUMP_MAX_UNTIL_2 2
766#define JUMP_MAX_UNTIL_3 3
767#define JUMP_MIN_UNTIL_1 4
768#define JUMP_MIN_UNTIL_2 5
769#define JUMP_MIN_UNTIL_3 6
770#define JUMP_REPEAT 7
771#define JUMP_REPEAT_ONE_1 8
772#define JUMP_REPEAT_ONE_2 9
773#define JUMP_MIN_REPEAT_ONE 10
774#define JUMP_BRANCH 11
775#define JUMP_ASSERT 12
776#define JUMP_ASSERT_NOT 13
777
778#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
779 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
780 nextctx->last_ctx_pos = ctx_pos; \
781 nextctx->jump = jumpvalue; \
782 nextctx->pattern = nextpattern; \
783 ctx_pos = alloc_pos; \
784 ctx = nextctx; \
785 goto entrance; \
786 jumplabel: \
787 while (0) /* gcc doesn't like labels at end of scopes */ \
788
789typedef struct {
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000790 Py_ssize_t last_ctx_pos;
791 Py_ssize_t jump;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000792 SRE_CHAR* ptr;
793 SRE_CODE* pattern;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000794 Py_ssize_t count;
795 Py_ssize_t lastmark;
796 Py_ssize_t lastindex;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000797 union {
798 SRE_CODE chr;
799 SRE_REPEAT* rep;
800 } u;
801} SRE_MATCH_CONTEXT;
802
803/* check if string matches the given pattern. returns <0 for
804 error, 0 for failure, and 1 for success */
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000805LOCAL(Py_ssize_t)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000806SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
Guido van Rossumb700df92000-03-31 14:59:30 +0000807{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000808 SRE_CHAR* end = (SRE_CHAR *)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000809 Py_ssize_t alloc_pos, ctx_pos = -1;
810 Py_ssize_t i, ret = 0;
811 Py_ssize_t jump;
Guido van Rossumb700df92000-03-31 14:59:30 +0000812
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000813 SRE_MATCH_CONTEXT* ctx;
814 SRE_MATCH_CONTEXT* nextctx;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000816 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000817
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000818 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
819 ctx->last_ctx_pos = -1;
820 ctx->jump = JUMP_NONE;
821 ctx->pattern = pattern;
822 ctx_pos = alloc_pos;
823
824entrance:
825
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000826 ctx->ptr = (SRE_CHAR *)state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000827
828 if (ctx->pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000829 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000830 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000831 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000832 TRACE(("reject (got %d chars, need %d)\n",
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000833 (end - ctx->ptr), ctx->pattern[3]));
834 RETURN_FAILURE;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000835 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000836 ctx->pattern += ctx->pattern[1] + 1;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000837 }
838
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000839 for (;;) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000840
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000841 switch (*ctx->pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000842
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000843 case SRE_OP_MARK:
844 /* set mark */
845 /* <MARK> <gid> */
846 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
847 ctx->ptr, ctx->pattern[0]));
848 i = ctx->pattern[0];
849 if (i & 1)
850 state->lastindex = i/2 + 1;
851 if (i > state->lastmark) {
852 /* state->lastmark is the highest valid index in the
853 state->mark array. If it is increased by more than 1,
854 the intervening marks must be set to NULL to signal
Tim Peters3d563502006-01-21 02:47:53 +0000855 that these marks have not been encountered. */
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000856 Py_ssize_t j = state->lastmark + 1;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000857 while (j < i)
858 state->mark[j++] = NULL;
859 state->lastmark = i;
860 }
861 state->mark[i] = ctx->ptr;
862 ctx->pattern++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000863 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000864
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000865 case SRE_OP_LITERAL:
866 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000867 /* <LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000868 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
869 ctx->ptr, *ctx->pattern));
870 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
871 RETURN_FAILURE;
872 ctx->pattern++;
873 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000874 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000875
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000876 case SRE_OP_NOT_LITERAL:
877 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000878 /* <NOT_LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000879 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
880 ctx->ptr, *ctx->pattern));
881 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
882 RETURN_FAILURE;
883 ctx->pattern++;
884 ctx->ptr++;
885 break;
886
887 case SRE_OP_SUCCESS:
888 /* end of pattern */
889 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
890 state->ptr = ctx->ptr;
891 RETURN_SUCCESS;
892
893 case SRE_OP_AT:
894 /* match at given position */
895 /* <AT> <code> */
896 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
897 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
898 RETURN_FAILURE;
899 ctx->pattern++;
900 break;
901
902 case SRE_OP_CATEGORY:
903 /* match at given category */
904 /* <CATEGORY> <code> */
905 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
906 ctx->ptr, *ctx->pattern));
907 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
908 RETURN_FAILURE;
909 ctx->pattern++;
910 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000911 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000912
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000913 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000914 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000915 /* <ANY> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000916 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
917 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
918 RETURN_FAILURE;
919 ctx->ptr++;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000920 break;
921
922 case SRE_OP_ANY_ALL:
923 /* match anything */
924 /* <ANY_ALL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000925 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
926 if (ctx->ptr >= end)
927 RETURN_FAILURE;
928 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000929 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000930
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000931 case SRE_OP_IN:
932 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000933 /* <IN> <skip> <set> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000934 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
935 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
936 RETURN_FAILURE;
937 ctx->pattern += ctx->pattern[0];
938 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000939 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000940
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000941 case SRE_OP_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000942 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
943 ctx->pattern, ctx->ptr, ctx->pattern[0]));
944 if (ctx->ptr >= end ||
945 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
946 RETURN_FAILURE;
947 ctx->pattern++;
948 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000949 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000950
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000951 case SRE_OP_NOT_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000952 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
953 ctx->pattern, ctx->ptr, *ctx->pattern));
954 if (ctx->ptr >= end ||
955 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
956 RETURN_FAILURE;
957 ctx->pattern++;
958 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000959 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000960
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000961 case SRE_OP_IN_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000962 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
963 if (ctx->ptr >= end
964 || !SRE_CHARSET(ctx->pattern+1,
965 (SRE_CODE)state->lower(*ctx->ptr)))
966 RETURN_FAILURE;
967 ctx->pattern += ctx->pattern[0];
968 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000969 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000970
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000971 case SRE_OP_JUMP:
972 case SRE_OP_INFO:
973 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000974 /* <JUMP> <offset> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000975 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
976 ctx->ptr, ctx->pattern[0]));
977 ctx->pattern += ctx->pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000978 break;
979
980 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000981 /* alternation */
982 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000983 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000984 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000985 ctx->u.rep = state->repeat;
986 if (ctx->u.rep)
987 MARK_PUSH(ctx->lastmark);
988 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
989 if (ctx->pattern[1] == SRE_OP_LITERAL &&
990 (ctx->ptr >= end ||
991 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000992 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000993 if (ctx->pattern[1] == SRE_OP_IN &&
994 (ctx->ptr >= end ||
995 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000996 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000997 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000998 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000999 if (ret) {
1000 if (ctx->u.rep)
1001 MARK_POP_DISCARD(ctx->lastmark);
1002 RETURN_ON_ERROR(ret);
1003 RETURN_SUCCESS;
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +00001004 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001005 if (ctx->u.rep)
1006 MARK_POP_KEEP(ctx->lastmark);
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001007 LASTMARK_RESTORE();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001008 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001009 if (ctx->u.rep)
1010 MARK_POP_DISCARD(ctx->lastmark);
1011 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001012
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001013 case SRE_OP_REPEAT_ONE:
1014 /* match repeated sequence (maximizing regexp) */
1015
1016 /* this operator only works if the repeated item is
1017 exactly one character wide, and we're not already
1018 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +00001019 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001020
1021 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1022
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001023 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1024 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001025
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001026 if (ctx->ptr + ctx->pattern[1] > end)
1027 RETURN_FAILURE; /* cannot match */
Fredrik Lundhe1869832000-08-01 22:47:49 +00001028
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001029 state->ptr = ctx->ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001030
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001031 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1032 RETURN_ON_ERROR(ret);
1033 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1034 ctx->count = ret;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001035 ctx->ptr += ctx->count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001036
1037 /* when we arrive here, count contains the number of
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001038 matches, and ctx->ptr points to the tail of the target
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001039 string. check if the rest of the pattern matches,
1040 and backtrack if not. */
1041
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001042 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001043 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001044
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001045 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001046 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001047 state->ptr = ctx->ptr;
1048 RETURN_SUCCESS;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001049 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001050
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001051 LASTMARK_SAVE();
1052
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001053 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001054 /* tail starts with a literal. skip positions where
1055 the rest of the pattern cannot possibly match */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001056 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001057 for (;;) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001058 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001059 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1060 ctx->ptr--;
1061 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001062 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001063 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001064 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001065 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001066 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1067 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001068 if (ret) {
1069 RETURN_ON_ERROR(ret);
1070 RETURN_SUCCESS;
1071 }
Tim Peters3d563502006-01-21 02:47:53 +00001072
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001073 LASTMARK_RESTORE();
Tim Peters3d563502006-01-21 02:47:53 +00001074
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001075 ctx->ptr--;
1076 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001077 }
1078
1079 } else {
1080 /* general case */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001081 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001082 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001083 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1084 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001085 if (ret) {
1086 RETURN_ON_ERROR(ret);
1087 RETURN_SUCCESS;
1088 }
1089 ctx->ptr--;
1090 ctx->count--;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001091 LASTMARK_RESTORE();
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001092 }
1093 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001094 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001095
Guido van Rossum41c99e72003-04-14 17:59:34 +00001096 case SRE_OP_MIN_REPEAT_ONE:
1097 /* match repeated sequence (minimizing regexp) */
1098
1099 /* this operator only works if the repeated item is
1100 exactly one character wide, and we're not already
1101 collecting backtracking points. for other cases,
1102 use the MIN_REPEAT operator */
1103
1104 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1105
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001106 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1107 ctx->pattern[1], ctx->pattern[2]));
Guido van Rossum41c99e72003-04-14 17:59:34 +00001108
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001109 if (ctx->ptr + ctx->pattern[1] > end)
1110 RETURN_FAILURE; /* cannot match */
Guido van Rossum41c99e72003-04-14 17:59:34 +00001111
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001112 state->ptr = ctx->ptr;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001113
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001114 if (ctx->pattern[1] == 0)
1115 ctx->count = 0;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001116 else {
1117 /* count using pattern min as the maximum */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001118 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1119 RETURN_ON_ERROR(ret);
1120 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001121 if (ret < (Py_ssize_t) ctx->pattern[1])
Tim Peters3d563502006-01-21 02:47:53 +00001122 /* didn't match minimum number of times */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001123 RETURN_FAILURE;
1124 /* advance past minimum matches of repeat */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001125 ctx->count = ret;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001126 ctx->ptr += ctx->count;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001127 }
1128
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001129 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Guido van Rossum41c99e72003-04-14 17:59:34 +00001130 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001131 state->ptr = ctx->ptr;
1132 RETURN_SUCCESS;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001133
1134 } else {
1135 /* general case */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001136 LASTMARK_SAVE();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001137 while ((Py_ssize_t)ctx->pattern[2] == 65535
1138 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001139 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001140 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1141 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001142 if (ret) {
1143 RETURN_ON_ERROR(ret);
1144 RETURN_SUCCESS;
1145 }
1146 state->ptr = ctx->ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001147 ret = SRE_COUNT(state, ctx->pattern+3, 1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001148 RETURN_ON_ERROR(ret);
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001149 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001150 if (ret == 0)
Guido van Rossum41c99e72003-04-14 17:59:34 +00001151 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001152 assert(ret == 1);
1153 ctx->ptr++;
1154 ctx->count++;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001155 LASTMARK_RESTORE();
Guido van Rossum41c99e72003-04-14 17:59:34 +00001156 }
Guido van Rossum41c99e72003-04-14 17:59:34 +00001157 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001158 RETURN_FAILURE;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001160 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001161 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001162 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001163 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001164 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1165 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001166
1167 /* install new repeat context */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001168 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
Thomas Wouters89f507f2006-12-13 04:49:30 +00001169 if (!ctx->u.rep) {
1170 PyErr_NoMemory();
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001171 RETURN_FAILURE;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001172 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001173 ctx->u.rep->count = -1;
1174 ctx->u.rep->pattern = ctx->pattern;
1175 ctx->u.rep->prev = state->repeat;
1176 ctx->u.rep->last_ptr = NULL;
1177 state->repeat = ctx->u.rep;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001178
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001179 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001180 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001181 state->repeat = ctx->u.rep->prev;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001182 PyObject_FREE(ctx->u.rep);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001183
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001184 if (ret) {
1185 RETURN_ON_ERROR(ret);
1186 RETURN_SUCCESS;
1187 }
1188 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001189
1190 case SRE_OP_MAX_UNTIL:
1191 /* maximizing repeat */
1192 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1193
1194 /* FIXME: we probably need to deal with zero-width
1195 matches in here... */
1196
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001197 ctx->u.rep = state->repeat;
1198 if (!ctx->u.rep)
1199 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001200
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001201 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001202
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001203 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001204
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001205 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1206 ctx->ptr, ctx->count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001207
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001208 if (ctx->count < ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001209 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001210 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001211 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1212 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001213 if (ret) {
1214 RETURN_ON_ERROR(ret);
1215 RETURN_SUCCESS;
1216 }
1217 ctx->u.rep->count = ctx->count-1;
1218 state->ptr = ctx->ptr;
1219 RETURN_FAILURE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001220 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001221
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001222 if ((ctx->count < ctx->u.rep->pattern[2] ||
1223 ctx->u.rep->pattern[2] == 65535) &&
1224 state->ptr != ctx->u.rep->last_ptr) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001225 /* we may have enough matches, but if we can
1226 match another item, do so */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001227 ctx->u.rep->count = ctx->count;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001228 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001229 MARK_PUSH(ctx->lastmark);
1230 /* zero-width match protection */
1231 DATA_PUSH(&ctx->u.rep->last_ptr);
1232 ctx->u.rep->last_ptr = state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001233 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1234 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001235 DATA_POP(&ctx->u.rep->last_ptr);
1236 if (ret) {
1237 MARK_POP_DISCARD(ctx->lastmark);
1238 RETURN_ON_ERROR(ret);
1239 RETURN_SUCCESS;
1240 }
1241 MARK_POP(ctx->lastmark);
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001242 LASTMARK_RESTORE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001243 ctx->u.rep->count = ctx->count-1;
1244 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001245 }
1246
1247 /* cannot match more repeated items here. make sure the
1248 tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001249 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001250 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001251 RETURN_ON_SUCCESS(ret);
1252 state->repeat = ctx->u.rep;
1253 state->ptr = ctx->ptr;
1254 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001255
1256 case SRE_OP_MIN_UNTIL:
1257 /* minimizing repeat */
1258 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1259
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001260 ctx->u.rep = state->repeat;
1261 if (!ctx->u.rep)
1262 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001263
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001264 state->ptr = ctx->ptr;
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001265
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001266 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001267
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001268 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1269 ctx->ptr, ctx->count, ctx->u.rep->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001270
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001271 if (ctx->count < ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001272 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001273 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001274 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1275 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001276 if (ret) {
1277 RETURN_ON_ERROR(ret);
1278 RETURN_SUCCESS;
1279 }
1280 ctx->u.rep->count = ctx->count-1;
1281 state->ptr = ctx->ptr;
1282 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001283 }
1284
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001285 LASTMARK_SAVE();
1286
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001287 /* see if the tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001288 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001289 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001290 if (ret) {
1291 RETURN_ON_ERROR(ret);
1292 RETURN_SUCCESS;
1293 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001294
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001295 state->repeat = ctx->u.rep;
1296 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001297
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001298 LASTMARK_RESTORE();
1299
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001300 if (ctx->count >= ctx->u.rep->pattern[2]
1301 && ctx->u.rep->pattern[2] != 65535)
1302 RETURN_FAILURE;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001303
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001304 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001305 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1306 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001307 if (ret) {
1308 RETURN_ON_ERROR(ret);
1309 RETURN_SUCCESS;
1310 }
1311 ctx->u.rep->count = ctx->count-1;
1312 state->ptr = ctx->ptr;
1313 RETURN_FAILURE;
1314
1315 case SRE_OP_GROUPREF:
1316 /* match backreference */
1317 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1318 ctx->ptr, ctx->pattern[0]));
1319 i = ctx->pattern[0];
1320 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001321 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001322 if (groupref >= state->lastmark) {
1323 RETURN_FAILURE;
1324 } else {
1325 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1326 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1327 if (!p || !e || e < p)
1328 RETURN_FAILURE;
1329 while (p < e) {
1330 if (ctx->ptr >= end || *ctx->ptr != *p)
1331 RETURN_FAILURE;
1332 p++; ctx->ptr++;
1333 }
1334 }
1335 }
1336 ctx->pattern++;
1337 break;
1338
1339 case SRE_OP_GROUPREF_IGNORE:
1340 /* match backreference */
1341 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1342 ctx->ptr, ctx->pattern[0]));
1343 i = ctx->pattern[0];
1344 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001345 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001346 if (groupref >= state->lastmark) {
1347 RETURN_FAILURE;
1348 } else {
1349 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1350 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1351 if (!p || !e || e < p)
1352 RETURN_FAILURE;
1353 while (p < e) {
1354 if (ctx->ptr >= end ||
1355 state->lower(*ctx->ptr) != state->lower(*p))
1356 RETURN_FAILURE;
1357 p++; ctx->ptr++;
1358 }
1359 }
1360 }
1361 ctx->pattern++;
1362 break;
1363
1364 case SRE_OP_GROUPREF_EXISTS:
1365 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1366 ctx->ptr, ctx->pattern[0]));
1367 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1368 i = ctx->pattern[0];
1369 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001370 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001371 if (groupref >= state->lastmark) {
1372 ctx->pattern += ctx->pattern[1];
1373 break;
1374 } else {
1375 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1376 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1377 if (!p || !e || e < p) {
1378 ctx->pattern += ctx->pattern[1];
1379 break;
1380 }
1381 }
1382 }
1383 ctx->pattern += 2;
1384 break;
1385
1386 case SRE_OP_ASSERT:
1387 /* assert subpattern */
1388 /* <ASSERT> <skip> <back> <pattern> */
1389 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1390 ctx->ptr, ctx->pattern[1]));
1391 state->ptr = ctx->ptr - ctx->pattern[1];
1392 if (state->ptr < state->beginning)
1393 RETURN_FAILURE;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001394 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001395 RETURN_ON_FAILURE(ret);
1396 ctx->pattern += ctx->pattern[0];
1397 break;
1398
1399 case SRE_OP_ASSERT_NOT:
1400 /* assert not subpattern */
1401 /* <ASSERT_NOT> <skip> <back> <pattern> */
1402 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1403 ctx->ptr, ctx->pattern[1]));
1404 state->ptr = ctx->ptr - ctx->pattern[1];
1405 if (state->ptr >= state->beginning) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001406 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001407 if (ret) {
1408 RETURN_ON_ERROR(ret);
1409 RETURN_FAILURE;
1410 }
1411 }
1412 ctx->pattern += ctx->pattern[0];
1413 break;
1414
1415 case SRE_OP_FAILURE:
1416 /* immediate failure */
1417 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1418 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001419
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001420 default:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001421 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1422 ctx->pattern[-1]));
1423 RETURN_ERROR(SRE_ERROR_ILLEGAL);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001424 }
1425 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001426
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001427exit:
1428 ctx_pos = ctx->last_ctx_pos;
1429 jump = ctx->jump;
1430 DATA_POP_DISCARD(ctx);
1431 if (ctx_pos == -1)
1432 return ret;
1433 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1434
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001435 switch (jump) {
1436 case JUMP_MAX_UNTIL_2:
1437 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1438 goto jump_max_until_2;
1439 case JUMP_MAX_UNTIL_3:
1440 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1441 goto jump_max_until_3;
1442 case JUMP_MIN_UNTIL_2:
1443 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1444 goto jump_min_until_2;
1445 case JUMP_MIN_UNTIL_3:
1446 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1447 goto jump_min_until_3;
1448 case JUMP_BRANCH:
1449 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1450 goto jump_branch;
1451 case JUMP_MAX_UNTIL_1:
1452 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1453 goto jump_max_until_1;
1454 case JUMP_MIN_UNTIL_1:
1455 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1456 goto jump_min_until_1;
1457 case JUMP_REPEAT:
1458 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1459 goto jump_repeat;
1460 case JUMP_REPEAT_ONE_1:
1461 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1462 goto jump_repeat_one_1;
1463 case JUMP_REPEAT_ONE_2:
1464 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1465 goto jump_repeat_one_2;
1466 case JUMP_MIN_REPEAT_ONE:
1467 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1468 goto jump_min_repeat_one;
1469 case JUMP_ASSERT:
1470 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1471 goto jump_assert;
1472 case JUMP_ASSERT_NOT:
1473 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1474 goto jump_assert_not;
1475 case JUMP_NONE:
1476 TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
1477 break;
1478 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001479
1480 return ret; /* should never get here */
Guido van Rossumb700df92000-03-31 14:59:30 +00001481}
1482
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001483LOCAL(Py_ssize_t)
Guido van Rossumb700df92000-03-31 14:59:30 +00001484SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1485{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001486 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1487 SRE_CHAR* end = (SRE_CHAR *)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001488 Py_ssize_t status = 0;
1489 Py_ssize_t prefix_len = 0;
1490 Py_ssize_t prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001491 SRE_CODE* prefix = NULL;
1492 SRE_CODE* charset = NULL;
1493 SRE_CODE* overlap = NULL;
1494 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001495
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001496 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001497 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001498 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001499
1500 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001501
Gustavo Niemeyer28b5bb32003-06-26 14:41:08 +00001502 if (pattern[3] > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001503 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001504 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001505 end -= pattern[3]-1;
1506 if (end <= ptr)
1507 end = ptr+1;
1508 }
1509
Fredrik Lundh3562f112000-07-02 12:00:07 +00001510 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001511 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001512 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001513 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001514 prefix_skip = pattern[6];
1515 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001516 overlap = prefix + prefix_len - 1;
1517 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001518 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001519 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001520 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001521
1522 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001523 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001524
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001525 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1526 TRACE(("charset = %p\n", charset));
1527
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001528#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001529 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001530 /* pattern starts with a known prefix. use the overlap
1531 table to skip forward as fast as we possibly can */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001532 Py_ssize_t i = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001533 end = (SRE_CHAR *)state->end;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001534 while (ptr < end) {
1535 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001536 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001537 if (!i)
1538 break;
1539 else
1540 i = overlap[i];
1541 } else {
1542 if (++i == prefix_len) {
1543 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001544 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1545 state->start = ptr + 1 - prefix_len;
1546 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001547 if (flags & SRE_INFO_LITERAL)
1548 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001549 status = SRE_MATCH(state, pattern + 2*prefix_skip);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001550 if (status != 0)
1551 return status;
1552 /* close but no cigar -- try again */
1553 i = overlap[i];
1554 }
1555 break;
1556 }
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001557 }
1558 ptr++;
1559 }
1560 return 0;
1561 }
1562#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001563
Fredrik Lundh3562f112000-07-02 12:00:07 +00001564 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001565 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001566 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001567 SRE_CODE chr = pattern[1];
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001568 end = (SRE_CHAR *)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001569 for (;;) {
1570 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1571 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001572 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001573 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001574 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001575 state->start = ptr;
1576 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001577 if (flags & SRE_INFO_LITERAL)
1578 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001579 status = SRE_MATCH(state, pattern + 2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001580 if (status != 0)
1581 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001582 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001583 } else if (charset) {
1584 /* pattern starts with a character from a known set */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001585 end = (SRE_CHAR *)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001586 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001587 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001588 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001589 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001590 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001591 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001592 state->start = ptr;
1593 state->ptr = ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001594 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001595 if (status != 0)
1596 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001597 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001598 }
1599 } else
1600 /* general case */
1601 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001602 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001603 state->start = state->ptr = ptr++;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001604 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001605 if (status != 0)
1606 break;
1607 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001608
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001609 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001610}
Tim Peters3d563502006-01-21 02:47:53 +00001611
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001612LOCAL(int)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001613SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001614{
1615 /* check if given string is a literal template (i.e. no escapes) */
1616 while (len-- > 0)
1617 if (*ptr++ == '\\')
1618 return 0;
1619 return 1;
1620}
Guido van Rossumb700df92000-03-31 14:59:30 +00001621
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001622#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001623
1624/* -------------------------------------------------------------------- */
1625/* factories and destructors */
1626
1627/* see sre.h for object declarations */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001628static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1629static PyObject*pattern_scanner(PatternObject*, PyObject*);
Guido van Rossumb700df92000-03-31 14:59:30 +00001630
1631static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001632sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +00001633{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001634 return Py_BuildValue("l", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001635}
1636
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001637static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001638sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001639{
1640 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001641 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001642 return NULL;
1643 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001644 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001645 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001646#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001647 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001648#else
1649 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001650#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001651 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001652}
1653
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001654LOCAL(void)
1655state_reset(SRE_STATE* state)
1656{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001657 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001658 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001659
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001660 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001661 state->lastindex = -1;
1662
1663 state->repeat = NULL;
1664
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001665 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001666}
1667
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001668static void*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001669getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001670{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001671 /* given a python object, return a data pointer, a length (in
1672 characters), and a character size. return NULL if the object
1673 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +00001674
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001675 PyBufferProcs *buffer;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001676 Py_ssize_t size, bytes;
1677 int charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001678 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001679
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001680#if defined(HAVE_UNICODE)
1681 if (PyUnicode_Check(string)) {
1682 /* unicode strings doesn't always support the buffer interface */
1683 ptr = (void*) PyUnicode_AS_DATA(string);
1684 bytes = PyUnicode_GET_DATA_SIZE(string);
1685 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001686 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001687
1688 } else {
1689#endif
1690
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001691 /* get pointer to string buffer */
1692 buffer = string->ob_type->tp_as_buffer;
1693 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1694 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001695 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001696 return NULL;
1697 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001698
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001699 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001700 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1701 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001702 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1703 return NULL;
1704 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001705
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001706 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001707#if PY_VERSION_HEX >= 0x01060000
1708 size = PyObject_Size(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001709#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001710 size = PyObject_Length(string);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001711#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001712
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001713 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001714 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001715#if defined(HAVE_UNICODE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001716 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001717 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001718#endif
1719 else {
1720 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1721 return NULL;
1722 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001723
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001724#if defined(HAVE_UNICODE)
1725 }
1726#endif
1727
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001728 *p_length = size;
1729 *p_charsize = charsize;
1730
1731 return ptr;
1732}
1733
1734LOCAL(PyObject*)
1735state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001736 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001737{
1738 /* prepare state object */
1739
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001740 Py_ssize_t length;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001741 int charsize;
1742 void* ptr;
1743
1744 memset(state, 0, sizeof(SRE_STATE));
1745
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001746 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001747 state->lastindex = -1;
1748
1749 ptr = getstring(string, &length, &charsize);
1750 if (!ptr)
1751 return NULL;
1752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001753 /* adjust boundaries */
1754 if (start < 0)
1755 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001756 else if (start > length)
1757 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001759 if (end < 0)
1760 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001761 else if (end > length)
1762 end = length;
1763
1764 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001765
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001766 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001767
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001768 state->start = (void*) ((char*) ptr + start * state->charsize);
1769 state->end = (void*) ((char*) ptr + end * state->charsize);
1770
1771 Py_INCREF(string);
1772 state->string = string;
1773 state->pos = start;
1774 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001775
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001776 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001777 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001778 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001779#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001780 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001781#else
1782 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001783#endif
1784 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001785 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001786
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001787 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001788}
1789
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001790LOCAL(void)
1791state_fini(SRE_STATE* state)
1792{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001793 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001794 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001795}
1796
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001797/* calculate offset from start of string */
1798#define STATE_OFFSET(state, member)\
1799 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1800
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001801LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001802state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001803{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001804 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +00001805
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001806 index = (index - 1) * 2;
1807
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001808 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001809 if (empty)
1810 /* want empty string */
1811 i = j = 0;
1812 else {
1813 Py_INCREF(Py_None);
1814 return Py_None;
1815 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001816 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001817 i = STATE_OFFSET(state, state->mark[index]);
1818 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001819 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001820
Fredrik Lundh58100642000-08-09 09:14:35 +00001821 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001822}
1823
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001824static void
1825pattern_error(int status)
1826{
1827 switch (status) {
1828 case SRE_ERROR_RECURSION_LIMIT:
1829 PyErr_SetString(
1830 PyExc_RuntimeError,
1831 "maximum recursion limit exceeded"
1832 );
1833 break;
1834 case SRE_ERROR_MEMORY:
1835 PyErr_NoMemory();
1836 break;
1837 default:
1838 /* other error codes indicate compiler/engine bugs */
1839 PyErr_SetString(
1840 PyExc_RuntimeError,
1841 "internal error in regular expression engine"
1842 );
1843 }
1844}
1845
Guido van Rossumb700df92000-03-31 14:59:30 +00001846static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001847pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001848{
Raymond Hettinger027bb632004-05-31 03:09:25 +00001849 if (self->weakreflist != NULL)
1850 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001851 Py_XDECREF(self->pattern);
1852 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001853 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001854 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001855}
1856
1857static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001858pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001859{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001860 SRE_STATE state;
1861 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001862
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001863 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001864 Py_ssize_t start = 0;
1865 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001866 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001867 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001868 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001869 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001870
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001871 string = state_init(&state, self, string, start, end);
1872 if (!string)
1873 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001874
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001875 state.ptr = state.start;
1876
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001877 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1878
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001879 if (state.charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001880 status = sre_match(&state, PatternObject_GetCode(self));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001881 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001882#if defined(HAVE_UNICODE)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001883 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001884#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001885 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001886
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001887 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +00001888 if (PyErr_Occurred())
1889 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001890
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001891 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001892
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001894}
1895
1896static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001897pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001898{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001899 SRE_STATE state;
1900 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001901
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001902 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001903 Py_ssize_t start = 0;
1904 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001905 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001906 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001907 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001908 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001909
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001910 string = state_init(&state, self, string, start, end);
1911 if (!string)
1912 return NULL;
1913
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001914 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1915
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001916 if (state.charsize == 1) {
1917 status = sre_search(&state, PatternObject_GetCode(self));
1918 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001919#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001920 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001921#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001922 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001923
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001924 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1925
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001926 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001927
Thomas Wouters89f507f2006-12-13 04:49:30 +00001928 if (PyErr_Occurred())
1929 return NULL;
1930
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001931 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001932}
1933
1934static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001935call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001936{
1937 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001938 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001939 PyObject* func;
1940 PyObject* result;
1941
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001942 if (!args)
1943 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001944 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001945 if (!name)
1946 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001947 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001948 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001949 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001950 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001951 func = PyObject_GetAttrString(mod, function);
1952 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001953 if (!func)
1954 return NULL;
1955 result = PyObject_CallObject(func, args);
1956 Py_DECREF(func);
1957 Py_DECREF(args);
1958 return result;
1959}
1960
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001961#ifdef USE_BUILTIN_COPY
1962static int
1963deepcopy(PyObject** object, PyObject* memo)
1964{
1965 PyObject* copy;
1966
1967 copy = call(
1968 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001969 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001970 );
1971 if (!copy)
1972 return 0;
1973
1974 Py_DECREF(*object);
1975 *object = copy;
1976
1977 return 1; /* success */
1978}
1979#endif
1980
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001981static PyObject*
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00001982join_list(PyObject* list, PyObject* pattern)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001983{
1984 /* join list elements */
1985
1986 PyObject* joiner;
1987#if PY_VERSION_HEX >= 0x01060000
1988 PyObject* function;
1989 PyObject* args;
1990#endif
1991 PyObject* result;
1992
1993 switch (PyList_GET_SIZE(list)) {
1994 case 0:
1995 Py_DECREF(list);
Fredrik Lundh09705f02002-11-22 12:46:35 +00001996 return PySequence_GetSlice(pattern, 0, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001997 case 1:
1998 result = PyList_GET_ITEM(list, 0);
1999 Py_INCREF(result);
2000 Py_DECREF(list);
2001 return result;
2002 }
2003
2004 /* two or more elements: slice out a suitable separator from the
2005 first member, and use that to join the entire list */
2006
2007 joiner = PySequence_GetSlice(pattern, 0, 0);
2008 if (!joiner)
2009 return NULL;
2010
2011#if PY_VERSION_HEX >= 0x01060000
2012 function = PyObject_GetAttrString(joiner, "join");
2013 if (!function) {
2014 Py_DECREF(joiner);
2015 return NULL;
2016 }
2017 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002018 if (!args) {
2019 Py_DECREF(function);
2020 Py_DECREF(joiner);
2021 return NULL;
2022 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002023 PyTuple_SET_ITEM(args, 0, list);
2024 result = PyObject_CallObject(function, args);
2025 Py_DECREF(args); /* also removes list */
2026 Py_DECREF(function);
2027#else
2028 result = call(
2029 "string", "join",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002030 PyTuple_Pack(2, list, joiner)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002031 );
2032#endif
2033 Py_DECREF(joiner);
2034
2035 return result;
2036}
2037
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002038static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002039pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002040{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002041 SRE_STATE state;
2042 PyObject* list;
2043 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002044 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00002045
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002046 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002047 Py_ssize_t start = 0;
2048 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002049 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002050 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00002051 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002052 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002053
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 string = state_init(&state, self, string, start, end);
2055 if (!string)
2056 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002057
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002059 if (!list) {
2060 state_fini(&state);
2061 return NULL;
2062 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002065
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +00002067
Fredrik Lundhebc37b22000-10-28 19:30:41 +00002068 state_reset(&state);
2069
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002070 state.ptr = state.start;
2071
2072 if (state.charsize == 1) {
2073 status = sre_search(&state, PatternObject_GetCode(self));
2074 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002075#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002076 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002077#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002078 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002079
Thomas Wouters89f507f2006-12-13 04:49:30 +00002080 if (PyErr_Occurred())
2081 goto error;
2082
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002083 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002084 if (status == 0)
2085 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00002086 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002087 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002088 }
Tim Peters3d563502006-01-21 02:47:53 +00002089
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002090 /* don't bother to build a match object */
2091 switch (self->groups) {
2092 case 0:
2093 b = STATE_OFFSET(&state, state.start);
2094 e = STATE_OFFSET(&state, state.ptr);
2095 item = PySequence_GetSlice(string, b, e);
2096 if (!item)
2097 goto error;
2098 break;
2099 case 1:
2100 item = state_getslice(&state, 1, string, 1);
2101 if (!item)
2102 goto error;
2103 break;
2104 default:
2105 item = PyTuple_New(self->groups);
2106 if (!item)
2107 goto error;
2108 for (i = 0; i < self->groups; i++) {
2109 PyObject* o = state_getslice(&state, i+1, string, 1);
2110 if (!o) {
2111 Py_DECREF(item);
2112 goto error;
2113 }
2114 PyTuple_SET_ITEM(item, i, o);
2115 }
2116 break;
2117 }
2118
2119 status = PyList_Append(list, item);
2120 Py_DECREF(item);
2121 if (status < 0)
2122 goto error;
2123
2124 if (state.ptr == state.start)
2125 state.start = (void*) ((char*) state.ptr + state.charsize);
2126 else
2127 state.start = state.ptr;
2128
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002129 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002130
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002131 state_fini(&state);
2132 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00002133
2134error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002135 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002136 state_fini(&state);
2137 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002138
Guido van Rossumb700df92000-03-31 14:59:30 +00002139}
2140
Fredrik Lundh703ce812001-10-24 22:16:30 +00002141#if PY_VERSION_HEX >= 0x02020000
2142static PyObject*
2143pattern_finditer(PatternObject* pattern, PyObject* args)
2144{
2145 PyObject* scanner;
2146 PyObject* search;
2147 PyObject* iterator;
2148
2149 scanner = pattern_scanner(pattern, args);
2150 if (!scanner)
2151 return NULL;
2152
2153 search = PyObject_GetAttrString(scanner, "search");
2154 Py_DECREF(scanner);
2155 if (!search)
2156 return NULL;
2157
2158 iterator = PyCallIter_New(search, Py_None);
2159 Py_DECREF(search);
2160
2161 return iterator;
2162}
2163#endif
2164
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002165static PyObject*
2166pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2167{
2168 SRE_STATE state;
2169 PyObject* list;
2170 PyObject* item;
2171 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002172 Py_ssize_t n;
2173 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002174 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002175
2176 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002177 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002178 static char* kwlist[] = { "source", "maxsplit", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002179 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002180 &string, &maxsplit))
2181 return NULL;
2182
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002183 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002184 if (!string)
2185 return NULL;
2186
2187 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002188 if (!list) {
2189 state_fini(&state);
2190 return NULL;
2191 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002192
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002193 n = 0;
2194 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002195
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002196 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002197
2198 state_reset(&state);
2199
2200 state.ptr = state.start;
2201
2202 if (state.charsize == 1) {
2203 status = sre_search(&state, PatternObject_GetCode(self));
2204 } else {
2205#if defined(HAVE_UNICODE)
2206 status = sre_usearch(&state, PatternObject_GetCode(self));
2207#endif
2208 }
2209
Thomas Wouters89f507f2006-12-13 04:49:30 +00002210 if (PyErr_Occurred())
2211 goto error;
2212
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002213 if (status <= 0) {
2214 if (status == 0)
2215 break;
2216 pattern_error(status);
2217 goto error;
2218 }
Tim Peters3d563502006-01-21 02:47:53 +00002219
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002220 if (state.start == state.ptr) {
2221 if (last == state.end)
2222 break;
2223 /* skip one character */
2224 state.start = (void*) ((char*) state.ptr + state.charsize);
2225 continue;
2226 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002227
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002228 /* get segment before this match */
2229 item = PySequence_GetSlice(
2230 string, STATE_OFFSET(&state, last),
2231 STATE_OFFSET(&state, state.start)
2232 );
2233 if (!item)
2234 goto error;
2235 status = PyList_Append(list, item);
2236 Py_DECREF(item);
2237 if (status < 0)
2238 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002239
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002240 /* add groups (if any) */
2241 for (i = 0; i < self->groups; i++) {
2242 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002243 if (!item)
2244 goto error;
2245 status = PyList_Append(list, item);
2246 Py_DECREF(item);
2247 if (status < 0)
2248 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002249 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002250
2251 n = n + 1;
2252
2253 last = state.start = state.ptr;
2254
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002255 }
2256
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002257 /* get segment following last match (even if empty) */
2258 item = PySequence_GetSlice(
2259 string, STATE_OFFSET(&state, last), state.endpos
2260 );
2261 if (!item)
2262 goto error;
2263 status = PyList_Append(list, item);
2264 Py_DECREF(item);
2265 if (status < 0)
2266 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002267
2268 state_fini(&state);
2269 return list;
2270
2271error:
2272 Py_DECREF(list);
2273 state_fini(&state);
2274 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002275
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002276}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002277
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002278static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002279pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002280 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002281{
2282 SRE_STATE state;
2283 PyObject* list;
2284 PyObject* item;
2285 PyObject* filter;
2286 PyObject* args;
2287 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002288 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002289 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002290 Py_ssize_t n;
2291 Py_ssize_t i, b, e;
2292 int bint;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002293 int filter_is_callable;
2294
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002295 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002296 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002297 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002298 Py_INCREF(filter);
2299 filter_is_callable = 1;
2300 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002301 /* if not callable, check if it's a literal string */
2302 int literal;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002303 ptr = getstring(ptemplate, &n, &bint);
2304 b = bint;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002305 if (ptr) {
2306 if (b == 1) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002307 literal = sre_literal_template((unsigned char *)ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002308 } else {
2309#if defined(HAVE_UNICODE)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002310 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002311#endif
2312 }
2313 } else {
2314 PyErr_Clear();
2315 literal = 0;
2316 }
2317 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002318 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002319 Py_INCREF(filter);
2320 filter_is_callable = 0;
2321 } else {
2322 /* not a literal; hand it over to the template compiler */
2323 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002324 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002325 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002326 );
2327 if (!filter)
2328 return NULL;
2329 filter_is_callable = PyCallable_Check(filter);
2330 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002331 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002332
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002333 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002334 if (!string) {
2335 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002336 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002337 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002338
2339 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002340 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002341 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002342 state_fini(&state);
2343 return NULL;
2344 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002345
2346 n = i = 0;
2347
2348 while (!count || n < count) {
2349
2350 state_reset(&state);
2351
2352 state.ptr = state.start;
2353
2354 if (state.charsize == 1) {
2355 status = sre_search(&state, PatternObject_GetCode(self));
2356 } else {
2357#if defined(HAVE_UNICODE)
2358 status = sre_usearch(&state, PatternObject_GetCode(self));
2359#endif
2360 }
2361
Thomas Wouters89f507f2006-12-13 04:49:30 +00002362 if (PyErr_Occurred())
2363 goto error;
2364
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002365 if (status <= 0) {
2366 if (status == 0)
2367 break;
2368 pattern_error(status);
2369 goto error;
2370 }
Tim Peters3d563502006-01-21 02:47:53 +00002371
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002372 b = STATE_OFFSET(&state, state.start);
2373 e = STATE_OFFSET(&state, state.ptr);
2374
2375 if (i < b) {
2376 /* get segment before this match */
2377 item = PySequence_GetSlice(string, i, b);
2378 if (!item)
2379 goto error;
2380 status = PyList_Append(list, item);
2381 Py_DECREF(item);
2382 if (status < 0)
2383 goto error;
2384
2385 } else if (i == b && i == e && n > 0)
2386 /* ignore empty match on latest position */
2387 goto next;
2388
2389 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002390 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002391 match = pattern_new_match(self, &state, 1);
2392 if (!match)
2393 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002394 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002395 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002396 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002397 goto error;
2398 }
2399 item = PyObject_CallObject(filter, args);
2400 Py_DECREF(args);
2401 Py_DECREF(match);
2402 if (!item)
2403 goto error;
2404 } else {
2405 /* filter is literal string */
2406 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002407 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002408 }
2409
2410 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002411 if (item != Py_None) {
2412 status = PyList_Append(list, item);
2413 Py_DECREF(item);
2414 if (status < 0)
2415 goto error;
2416 }
Tim Peters3d563502006-01-21 02:47:53 +00002417
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002418 i = e;
2419 n = n + 1;
2420
2421next:
2422 /* move on */
2423 if (state.ptr == state.start)
2424 state.start = (void*) ((char*) state.ptr + state.charsize);
2425 else
2426 state.start = state.ptr;
2427
2428 }
2429
2430 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002431 if (i < state.endpos) {
2432 item = PySequence_GetSlice(string, i, state.endpos);
2433 if (!item)
2434 goto error;
2435 status = PyList_Append(list, item);
2436 Py_DECREF(item);
2437 if (status < 0)
2438 goto error;
2439 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002440
2441 state_fini(&state);
2442
Guido van Rossum4e173842001-12-07 04:25:10 +00002443 Py_DECREF(filter);
2444
Fredrik Lundhdac58492001-10-21 21:48:30 +00002445 /* convert list to single string (also removes list) */
Michael W. Hudsonb6a45052002-07-31 09:54:24 +00002446 item = join_list(list, self->pattern);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002447
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002448 if (!item)
2449 return NULL;
2450
2451 if (subn)
2452 return Py_BuildValue("Ni", item, n);
2453
2454 return item;
2455
2456error:
2457 Py_DECREF(list);
2458 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002459 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002460 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002461
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002462}
2463
2464static PyObject*
2465pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2466{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002467 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002468 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002469 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002470 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002471 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002472 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002473 return NULL;
2474
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002475 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002476}
2477
2478static PyObject*
2479pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2480{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002481 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002482 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002483 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002484 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002485 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002486 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002487 return NULL;
2488
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002489 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002490}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002491
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002492static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002493pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002494{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002495#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002496 PatternObject* copy;
2497 int offset;
2498
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002499 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2500 if (!copy)
2501 return NULL;
2502
2503 offset = offsetof(PatternObject, groups);
2504
2505 Py_XINCREF(self->groupindex);
2506 Py_XINCREF(self->indexgroup);
2507 Py_XINCREF(self->pattern);
2508
2509 memcpy((char*) copy + offset, (char*) self + offset,
2510 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00002511 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002512
2513 return (PyObject*) copy;
2514#else
2515 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2516 return NULL;
2517#endif
2518}
2519
2520static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002521pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002522{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002523#ifdef USE_BUILTIN_COPY
2524 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002525
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002526 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002527 if (!copy)
2528 return NULL;
2529
2530 if (!deepcopy(&copy->groupindex, memo) ||
2531 !deepcopy(&copy->indexgroup, memo) ||
2532 !deepcopy(&copy->pattern, memo)) {
2533 Py_DECREF(copy);
2534 return NULL;
2535 }
2536
2537#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002538 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2539 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002540#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002541}
2542
Raymond Hettinger94478742004-09-24 04:31:19 +00002543PyDoc_STRVAR(pattern_match_doc,
2544"match(string[, pos[, endpos]]) --> match object or None.\n\
2545 Matches zero or more characters at the beginning of the string");
2546
2547PyDoc_STRVAR(pattern_search_doc,
2548"search(string[, pos[, endpos]]) --> match object or None.\n\
2549 Scan through string looking for a match, and return a corresponding\n\
2550 MatchObject instance. Return None if no position in the string matches.");
2551
2552PyDoc_STRVAR(pattern_split_doc,
2553"split(string[, maxsplit = 0]) --> list.\n\
2554 Split string by the occurrences of pattern.");
2555
2556PyDoc_STRVAR(pattern_findall_doc,
2557"findall(string[, pos[, endpos]]) --> list.\n\
2558 Return a list of all non-overlapping matches of pattern in string.");
2559
2560PyDoc_STRVAR(pattern_finditer_doc,
2561"finditer(string[, pos[, endpos]]) --> iterator.\n\
2562 Return an iterator over all non-overlapping matches for the \n\
2563 RE pattern in string. For each match, the iterator returns a\n\
2564 match object.");
2565
2566PyDoc_STRVAR(pattern_sub_doc,
2567"sub(repl, string[, count = 0]) --> newstring\n\
2568 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00002569 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002570
2571PyDoc_STRVAR(pattern_subn_doc,
2572"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2573 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2574 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00002575 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002576
2577PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2578
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002579static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00002580 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002581 pattern_match_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002582 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002583 pattern_search_doc},
2584 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
2585 pattern_sub_doc},
2586 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
2587 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002588 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002589 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002590 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002591 pattern_findall_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002592#if PY_VERSION_HEX >= 0x02020000
Raymond Hettinger94478742004-09-24 04:31:19 +00002593 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
2594 pattern_finditer_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002595#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002596 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002597 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2598 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002599 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002600};
2601
Tim Peters3d563502006-01-21 02:47:53 +00002602static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002603pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002604{
2605 PyObject* res;
2606
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002607 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002608
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002609 if (res)
2610 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002611
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002612 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002613
2614 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002615 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002616 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002617 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002618 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002619
2620 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002621 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002622
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002623 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002624 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002625
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002626 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002627 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002628 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002629 }
2630
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002631 PyErr_SetString(PyExc_AttributeError, name);
2632 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002633}
2634
Neal Norwitz57c179c2006-03-22 07:18:02 +00002635static PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002636 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002637 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002638 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002639 (destructor)pattern_dealloc, /*tp_dealloc*/
2640 0, /*tp_print*/
Raymond Hettinger027bb632004-05-31 03:09:25 +00002641 (getattrfunc)pattern_getattr, /*tp_getattr*/
2642 0, /* tp_setattr */
2643 0, /* tp_compare */
2644 0, /* tp_repr */
2645 0, /* tp_as_number */
2646 0, /* tp_as_sequence */
2647 0, /* tp_as_mapping */
2648 0, /* tp_hash */
2649 0, /* tp_call */
2650 0, /* tp_str */
2651 0, /* tp_getattro */
2652 0, /* tp_setattro */
2653 0, /* tp_as_buffer */
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002654 Py_TPFLAGS_DEFAULT, /* tp_flags */
Raymond Hettinger94478742004-09-24 04:31:19 +00002655 pattern_doc, /* tp_doc */
Raymond Hettinger027bb632004-05-31 03:09:25 +00002656 0, /* tp_traverse */
2657 0, /* tp_clear */
2658 0, /* tp_richcompare */
2659 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002660};
2661
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002662static PyObject *
2663_compile(PyObject* self_, PyObject* args)
2664{
2665 /* "compile" pattern descriptor to pattern object */
2666
2667 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002668 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002669
2670 PyObject* pattern;
2671 int flags = 0;
2672 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002673 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002674 PyObject* groupindex = NULL;
2675 PyObject* indexgroup = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002676 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002677 &PyList_Type, &code, &groups,
2678 &groupindex, &indexgroup))
2679 return NULL;
2680
2681 n = PyList_GET_SIZE(code);
2682
2683 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2684 if (!self)
2685 return NULL;
2686
2687 self->codesize = n;
2688
2689 for (i = 0; i < n; i++) {
2690 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00002691 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002692 self->code[i] = (SRE_CODE) value;
2693 if ((unsigned long) self->code[i] != value) {
2694 PyErr_SetString(PyExc_OverflowError,
2695 "regular expression code size limit exceeded");
2696 break;
2697 }
2698 }
2699
2700 if (PyErr_Occurred()) {
2701 PyObject_DEL(self);
2702 return NULL;
2703 }
2704
2705 Py_INCREF(pattern);
2706 self->pattern = pattern;
2707
2708 self->flags = flags;
2709
2710 self->groups = groups;
2711
2712 Py_XINCREF(groupindex);
2713 self->groupindex = groupindex;
2714
2715 Py_XINCREF(indexgroup);
2716 self->indexgroup = indexgroup;
2717
2718 self->weakreflist = NULL;
2719
2720 return (PyObject*) self;
2721}
2722
Guido van Rossumb700df92000-03-31 14:59:30 +00002723/* -------------------------------------------------------------------- */
2724/* match methods */
2725
2726static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002727match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002728{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002729 Py_XDECREF(self->regs);
2730 Py_XDECREF(self->string);
2731 Py_DECREF(self->pattern);
2732 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002733}
2734
2735static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002736match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002737{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002738 if (index < 0 || index >= self->groups) {
2739 /* raise IndexError if we were given a bad group number */
2740 PyErr_SetString(
2741 PyExc_IndexError,
2742 "no such group"
2743 );
2744 return NULL;
2745 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002746
Fredrik Lundh6f013982000-07-03 18:44:21 +00002747 index *= 2;
2748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002749 if (self->string == Py_None || self->mark[index] < 0) {
2750 /* return default value if the string or group is undefined */
2751 Py_INCREF(def);
2752 return def;
2753 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002755 return PySequence_GetSlice(
2756 self->string, self->mark[index], self->mark[index+1]
2757 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002758}
2759
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002760static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002761match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002762{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002763 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002764
Guido van Rossumddefaf32007-01-14 03:31:43 +00002765 if (index == NULL)
2766 /* Default value */
2767 return 0;
2768
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002769 if (PyInt_Check(index))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002770 return PyInt_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002771
Fredrik Lundh6f013982000-07-03 18:44:21 +00002772 i = -1;
2773
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002774 if (self->pattern->groupindex) {
2775 index = PyObject_GetItem(self->pattern->groupindex, index);
2776 if (index) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002777 if (PyInt_Check(index) || PyLong_Check(index))
2778 i = PyInt_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002779 Py_DECREF(index);
2780 } else
2781 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002782 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002783
2784 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002785}
2786
2787static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002788match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002789{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002790 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002791}
2792
2793static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002794match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002795{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002796 /* delegate to Python code */
2797 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002798 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002799 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002800 );
2801}
2802
2803static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002804match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002805{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002806 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002807 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002808
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002809 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002810
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002811 switch (size) {
2812 case 0:
2813 result = match_getslice(self, Py_False, Py_None);
2814 break;
2815 case 1:
2816 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2817 break;
2818 default:
2819 /* fetch multiple items */
2820 result = PyTuple_New(size);
2821 if (!result)
2822 return NULL;
2823 for (i = 0; i < size; i++) {
2824 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002825 self, PyTuple_GET_ITEM(args, i), Py_None
2826 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002827 if (!item) {
2828 Py_DECREF(result);
2829 return NULL;
2830 }
2831 PyTuple_SET_ITEM(result, i, item);
2832 }
2833 break;
2834 }
2835 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002836}
2837
2838static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002839match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002840{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002841 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002842 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002843
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002844 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002845 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002846 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002847 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002848
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002849 result = PyTuple_New(self->groups-1);
2850 if (!result)
2851 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002852
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002853 for (index = 1; index < self->groups; index++) {
2854 PyObject* item;
2855 item = match_getslice_by_index(self, index, def);
2856 if (!item) {
2857 Py_DECREF(result);
2858 return NULL;
2859 }
2860 PyTuple_SET_ITEM(result, index-1, item);
2861 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002862
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002863 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002864}
2865
2866static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002867match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002868{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002869 PyObject* result;
2870 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002871 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002872
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002873 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002874 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002875 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002876 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002877
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002878 result = PyDict_New();
2879 if (!result || !self->pattern->groupindex)
2880 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002881
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002882 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002883 if (!keys)
2884 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002885
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002886 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002887 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002888 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002889 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002890 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002891 if (!key)
2892 goto failed;
2893 value = match_getslice(self, key, def);
2894 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002895 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002896 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002897 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002898 status = PyDict_SetItem(result, key, value);
2899 Py_DECREF(value);
2900 if (status < 0)
2901 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002902 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002903
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002904 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002905
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002906 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002907
2908failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002909 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002910 Py_DECREF(result);
2911 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002912}
2913
2914static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002915match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002916{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002917 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002918
Guido van Rossumddefaf32007-01-14 03:31:43 +00002919 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002920 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002921 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002922
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002923 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002924
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002925 if (index < 0 || index >= self->groups) {
2926 PyErr_SetString(
2927 PyExc_IndexError,
2928 "no such group"
2929 );
2930 return NULL;
2931 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002932
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002933 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002934 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002935}
2936
2937static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002938match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002939{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002940 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002941
Guido van Rossumddefaf32007-01-14 03:31:43 +00002942 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002943 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002944 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002945
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002946 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002947
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002948 if (index < 0 || index >= self->groups) {
2949 PyErr_SetString(
2950 PyExc_IndexError,
2951 "no such group"
2952 );
2953 return NULL;
2954 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002955
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002956 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002957 return Py_BuildValue("i", self->mark[index*2+1]);
2958}
2959
2960LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002961_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002962{
2963 PyObject* pair;
2964 PyObject* item;
2965
2966 pair = PyTuple_New(2);
2967 if (!pair)
2968 return NULL;
2969
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002970 item = PyInt_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002971 if (!item)
2972 goto error;
2973 PyTuple_SET_ITEM(pair, 0, item);
2974
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002975 item = PyInt_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002976 if (!item)
2977 goto error;
2978 PyTuple_SET_ITEM(pair, 1, item);
2979
2980 return pair;
2981
2982 error:
2983 Py_DECREF(pair);
2984 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002985}
2986
2987static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002988match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002989{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002990 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002991
Guido van Rossumddefaf32007-01-14 03:31:43 +00002992 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002993 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002994 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002995
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002996 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002997
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002998 if (index < 0 || index >= self->groups) {
2999 PyErr_SetString(
3000 PyExc_IndexError,
3001 "no such group"
3002 );
3003 return NULL;
3004 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003005
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003006 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003007 return _pair(self->mark[index*2], self->mark[index*2+1]);
3008}
3009
3010static PyObject*
3011match_regs(MatchObject* self)
3012{
3013 PyObject* regs;
3014 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003015 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003016
3017 regs = PyTuple_New(self->groups);
3018 if (!regs)
3019 return NULL;
3020
3021 for (index = 0; index < self->groups; index++) {
3022 item = _pair(self->mark[index*2], self->mark[index*2+1]);
3023 if (!item) {
3024 Py_DECREF(regs);
3025 return NULL;
3026 }
3027 PyTuple_SET_ITEM(regs, index, item);
3028 }
3029
3030 Py_INCREF(regs);
3031 self->regs = regs;
3032
3033 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00003034}
3035
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003036static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003037match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003038{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003039#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003040 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003041 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00003042
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003043 slots = 2 * (self->pattern->groups+1);
3044
3045 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3046 if (!copy)
3047 return NULL;
3048
3049 /* this value a constant, but any compiler should be able to
3050 figure that out all by itself */
3051 offset = offsetof(MatchObject, string);
3052
3053 Py_XINCREF(self->pattern);
3054 Py_XINCREF(self->string);
3055 Py_XINCREF(self->regs);
3056
3057 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003058 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003059
3060 return (PyObject*) copy;
3061#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003062 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003063 return NULL;
3064#endif
3065}
3066
3067static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003068match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003069{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003070#ifdef USE_BUILTIN_COPY
3071 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00003072
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003073 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003074 if (!copy)
3075 return NULL;
3076
3077 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3078 !deepcopy(&copy->string, memo) ||
3079 !deepcopy(&copy->regs, memo)) {
3080 Py_DECREF(copy);
3081 return NULL;
3082 }
3083
3084#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003085 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3086 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003087#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003088}
3089
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003090static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00003091 {"group", (PyCFunction) match_group, METH_VARARGS},
3092 {"start", (PyCFunction) match_start, METH_VARARGS},
3093 {"end", (PyCFunction) match_end, METH_VARARGS},
3094 {"span", (PyCFunction) match_span, METH_VARARGS},
3095 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
3096 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003097 {"expand", (PyCFunction) match_expand, METH_O},
3098 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3099 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003100 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003101};
3102
Tim Peters3d563502006-01-21 02:47:53 +00003103static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003104match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00003105{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003106 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00003107
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003108 res = Py_FindMethod(match_methods, (PyObject*) self, name);
3109 if (res)
3110 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00003111
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003112 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00003113
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003114 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00003115 if (self->lastindex >= 0)
3116 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00003117 Py_INCREF(Py_None);
3118 return Py_None;
3119 }
3120
3121 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00003122 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00003123 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00003124 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00003125 );
3126 if (result)
3127 return result;
3128 PyErr_Clear();
3129 }
3130 Py_INCREF(Py_None);
3131 return Py_None;
3132 }
3133
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003134 if (!strcmp(name, "string")) {
3135 if (self->string) {
3136 Py_INCREF(self->string);
3137 return self->string;
3138 } else {
3139 Py_INCREF(Py_None);
3140 return Py_None;
3141 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003142 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003143
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003144 if (!strcmp(name, "regs")) {
3145 if (self->regs) {
3146 Py_INCREF(self->regs);
3147 return self->regs;
3148 } else
3149 return match_regs(self);
3150 }
3151
3152 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00003153 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003154 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00003155 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003157 if (!strcmp(name, "pos"))
3158 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003160 if (!strcmp(name, "endpos"))
3161 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00003162
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003163 PyErr_SetString(PyExc_AttributeError, name);
3164 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003165}
3166
3167/* FIXME: implement setattr("string", None) as a special case (to
3168 detach the associated string, if any */
3169
Neal Norwitz57c179c2006-03-22 07:18:02 +00003170static PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003171 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00003172 0, "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003173 sizeof(MatchObject), sizeof(Py_ssize_t),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003174 (destructor)match_dealloc, /*tp_dealloc*/
3175 0, /*tp_print*/
3176 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00003177};
3178
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003179static PyObject*
3180pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3181{
3182 /* create match object (from state object) */
3183
3184 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003185 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003186 char* base;
3187 int n;
3188
3189 if (status > 0) {
3190
3191 /* create match object (with room for extra group marks) */
3192 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3193 2*(pattern->groups+1));
3194 if (!match)
3195 return NULL;
3196
3197 Py_INCREF(pattern);
3198 match->pattern = pattern;
3199
3200 Py_INCREF(state->string);
3201 match->string = state->string;
3202
3203 match->regs = NULL;
3204 match->groups = pattern->groups+1;
3205
3206 /* fill in group slices */
3207
3208 base = (char*) state->beginning;
3209 n = state->charsize;
3210
3211 match->mark[0] = ((char*) state->start - base) / n;
3212 match->mark[1] = ((char*) state->ptr - base) / n;
3213
3214 for (i = j = 0; i < pattern->groups; i++, j+=2)
3215 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3216 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3217 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3218 } else
3219 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3220
3221 match->pos = state->pos;
3222 match->endpos = state->endpos;
3223
3224 match->lastindex = state->lastindex;
3225
3226 return (PyObject*) match;
3227
3228 } else if (status == 0) {
3229
3230 /* no match */
3231 Py_INCREF(Py_None);
3232 return Py_None;
3233
3234 }
3235
3236 /* internal error */
3237 pattern_error(status);
3238 return NULL;
3239}
3240
3241
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003242/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003243/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003244
3245static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003246scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003247{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003248 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003249 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003250 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003251}
3252
3253static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003254scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003255{
3256 SRE_STATE* state = &self->state;
3257 PyObject* match;
3258 int status;
3259
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003260 state_reset(state);
3261
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003262 state->ptr = state->start;
3263
3264 if (state->charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003265 status = sre_match(state, PatternObject_GetCode(self->pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003266 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003267#if defined(HAVE_UNICODE)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003268 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003269#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003270 }
Thomas Wouters89f507f2006-12-13 04:49:30 +00003271 if (PyErr_Occurred())
3272 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003273
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003274 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003275 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003276
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003277 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003278 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003279 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003280 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003281
3282 return match;
3283}
3284
3285
3286static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003287scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003288{
3289 SRE_STATE* state = &self->state;
3290 PyObject* match;
3291 int status;
3292
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003293 state_reset(state);
3294
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003295 state->ptr = state->start;
3296
3297 if (state->charsize == 1) {
3298 status = sre_search(state, PatternObject_GetCode(self->pattern));
3299 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003300#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003301 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003302#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003303 }
Thomas Wouters89f507f2006-12-13 04:49:30 +00003304 if (PyErr_Occurred())
3305 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003306
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003307 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003308 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003309
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003310 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003311 state->start = (void*) ((char*) state->ptr + state->charsize);
3312 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003313 state->start = state->ptr;
3314
3315 return match;
3316}
3317
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003318static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003319 {"match", (PyCFunction) scanner_match, METH_NOARGS},
3320 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003321 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003322};
3323
Tim Peters3d563502006-01-21 02:47:53 +00003324static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003325scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003326{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003327 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003328
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003329 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
3330 if (res)
3331 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003332
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003333 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003334
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003335 /* attributes */
3336 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003337 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003338 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003339 }
3340
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003341 PyErr_SetString(PyExc_AttributeError, name);
3342 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003343}
3344
Neal Norwitz57c179c2006-03-22 07:18:02 +00003345static PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003346 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00003347 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003348 sizeof(ScannerObject), 0,
3349 (destructor)scanner_dealloc, /*tp_dealloc*/
3350 0, /*tp_print*/
3351 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003352};
3353
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003354static PyObject*
3355pattern_scanner(PatternObject* pattern, PyObject* args)
3356{
3357 /* create search state object */
3358
3359 ScannerObject* self;
3360
3361 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003362 Py_ssize_t start = 0;
3363 Py_ssize_t end = PY_SSIZE_T_MAX;
3364 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003365 return NULL;
3366
3367 /* create scanner object */
3368 self = PyObject_NEW(ScannerObject, &Scanner_Type);
3369 if (!self)
3370 return NULL;
3371
3372 string = state_init(&self->state, pattern, string, start, end);
3373 if (!string) {
3374 PyObject_DEL(self);
3375 return NULL;
3376 }
3377
3378 Py_INCREF(pattern);
3379 self->pattern = (PyObject*) pattern;
3380
3381 return (PyObject*) self;
3382}
3383
Guido van Rossumb700df92000-03-31 14:59:30 +00003384static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00003385 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003386 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00003387 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003388 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003389};
3390
Mark Hammond8235ea12002-07-19 06:55:41 +00003391PyMODINIT_FUNC init_sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00003392{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003393 PyObject* m;
3394 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00003395 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003396
Guido van Rossum50e9fb92006-08-17 05:42:55 +00003397 /* Initialize object types */
3398 if (PyType_Ready(&Pattern_Type) < 0)
3399 return;
3400 if (PyType_Ready(&Match_Type) < 0)
3401 return;
3402 if (PyType_Ready(&Scanner_Type) < 0)
3403 return;
Guido van Rossumb700df92000-03-31 14:59:30 +00003404
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00003405 m = Py_InitModule("_" SRE_MODULE, _functions);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00003406 if (m == NULL)
3407 return;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003408 d = PyModule_GetDict(m);
3409
Fredrik Lundh21009b92001-09-18 18:47:09 +00003410 x = PyInt_FromLong(SRE_MAGIC);
3411 if (x) {
3412 PyDict_SetItemString(d, "MAGIC", x);
3413 Py_DECREF(x);
3414 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003415
Martin v. Löwis78e2f062003-04-19 12:56:08 +00003416 x = PyInt_FromLong(sizeof(SRE_CODE));
3417 if (x) {
3418 PyDict_SetItemString(d, "CODESIZE", x);
3419 Py_DECREF(x);
3420 }
3421
Fredrik Lundh21009b92001-09-18 18:47:09 +00003422 x = PyString_FromString(copyright);
3423 if (x) {
3424 PyDict_SetItemString(d, "copyright", x);
3425 Py_DECREF(x);
3426 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003427}
3428
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003429#endif /* !defined(SRE_RECURSIVE) */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00003430
3431/* vim:ts=4:sw=4:et
3432*/