blob: bf802a686a31e735fdfbb97624cbbe6973eb4b0f [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
Fredrik Lundhebc37b22000-10-28 19:30:41 +00009 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000010 * 2000-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000011 * 2000-09-20 fl added expand method
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000012 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000013 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000014 * 2001-04-28 fl added __copy__ methods (work in progress)
Fredrik Lundh09705f02002-11-22 12:46:35 +000015 * 2001-05-14 fl fixes for 1.5.2 compatibility
Fredrik Lundhf71ae462001-07-02 17:04:48 +000016 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000017 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000018 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000019 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh703ce812001-10-24 22:16:30 +000020 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000021 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Fredrik Lundh09705f02002-11-22 12:46:35 +000022 * 2002-11-09 fl fixed empty sub/subn return type
Martin v. Löwis78e2f062003-04-19 12:56:08 +000023 * 2003-04-18 mvl fully support 4-byte codes
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +000024 * 2003-10-17 gn implemented non recursive scheme
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000026 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000027 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * other compatibility work.
35 */
36
37#ifndef SRE_RECURSIVE
38
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000039static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000040 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000041
Neal Norwitza6d80fa2006-06-12 03:05:40 +000042#define PY_SSIZE_T_CLEAN
43
Guido van Rossumb700df92000-03-31 14:59:30 +000044#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000045#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000046
47#include "sre.h"
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d52000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000052#if !defined(SRE_MODULE)
53#define SRE_MODULE "sre"
54#endif
Fredrik Lundh436c3d52000-06-29 08:58:44 +000055
Neal Norwitz94a9c092006-03-16 06:30:02 +000056#define SRE_PY_MODULE "re"
57
Guido van Rossumb700df92000-03-31 14:59:30 +000058/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000060
Fredrik Lundh971e78b2001-10-20 17:48:46 +000061#if PY_VERSION_HEX >= 0x01060000
62#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000063/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000064#define HAVE_UNICODE
65#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000066#endif
Fredrik Lundh436c3d52000-06-29 08:58:44 +000067
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000069/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070
71/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000072#define USE_FAST_SEARCH
73
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000074/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000076
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000077/* enables copy/deepcopy handling (work in progress) */
78#undef USE_BUILTIN_COPY
79
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000080#if PY_VERSION_HEX < 0x01060000
81#define PyObject_DEL(op) PyMem_DEL((op))
82#endif
83
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000084/* -------------------------------------------------------------------- */
85
Fredrik Lundh80946112000-06-29 18:03:25 +000086#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000087#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000088#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000089/* fastest possible local call under MSVC */
90#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000092#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000093#else
94#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000095#endif
96
97/* error codes */
98#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000099#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000100#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000101#define SRE_ERROR_MEMORY -9 /* out of memory */
Facundo Batista4473d222008-01-08 21:10:12 +0000102#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +0000103
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000104#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000105#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000106#else
107#define TRACE(v)
108#endif
109
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000110/* -------------------------------------------------------------------- */
111/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000112
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000113/* default character predicates (run sre_chars.py to regenerate tables) */
114
115#define SRE_DIGIT_MASK 1
116#define SRE_SPACE_MASK 2
117#define SRE_LINEBREAK_MASK 4
118#define SRE_ALNUM_MASK 8
119#define SRE_WORD_MASK 16
120
Fredrik Lundh21009b92001-09-18 18:47:09 +0000121/* FIXME: this assumes ASCII. create tables in init_sre() instead */
122
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000123static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1242, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1250, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12625, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12724, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1280, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
130
Fredrik Lundhb389df32000-06-29 12:48:37 +0000131static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000013210, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13327, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
13444, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13561, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
136108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
137122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
138106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
139120, 121, 122, 123, 124, 125, 126, 127 };
140
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000141#define SRE_IS_DIGIT(ch)\
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
143#define SRE_IS_SPACE(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
145#define SRE_IS_LINEBREAK(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
147#define SRE_IS_ALNUM(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
149#define SRE_IS_WORD(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000151
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000152static unsigned int sre_lower(unsigned int ch)
153{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000154 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000155}
156
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000157/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000158/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
159 * warnings when c's type supports only numbers < N+1 */
160#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
161#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000162#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000163#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000164#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
165
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000166static unsigned int sre_lower_locale(unsigned int ch)
167{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000168 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000169}
170
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000171/* unicode-specific character predicates */
172
173#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000174
Mark Dickinsonfe67bd92009-07-28 20:35:03 +0000175#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000176#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
177#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000178#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000180
181static unsigned int sre_lower_unicode(unsigned int ch)
182{
183 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
184}
185
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000186#endif
187
Guido van Rossumb700df92000-03-31 14:59:30 +0000188LOCAL(int)
189sre_category(SRE_CODE category, unsigned int ch)
190{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000191 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000192
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000193 case SRE_CATEGORY_DIGIT:
194 return SRE_IS_DIGIT(ch);
195 case SRE_CATEGORY_NOT_DIGIT:
196 return !SRE_IS_DIGIT(ch);
197 case SRE_CATEGORY_SPACE:
198 return SRE_IS_SPACE(ch);
199 case SRE_CATEGORY_NOT_SPACE:
200 return !SRE_IS_SPACE(ch);
201 case SRE_CATEGORY_WORD:
202 return SRE_IS_WORD(ch);
203 case SRE_CATEGORY_NOT_WORD:
204 return !SRE_IS_WORD(ch);
205 case SRE_CATEGORY_LINEBREAK:
206 return SRE_IS_LINEBREAK(ch);
207 case SRE_CATEGORY_NOT_LINEBREAK:
208 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000209
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000210 case SRE_CATEGORY_LOC_WORD:
211 return SRE_LOC_IS_WORD(ch);
212 case SRE_CATEGORY_LOC_NOT_WORD:
213 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000214
215#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000216 case SRE_CATEGORY_UNI_DIGIT:
217 return SRE_UNI_IS_DIGIT(ch);
218 case SRE_CATEGORY_UNI_NOT_DIGIT:
219 return !SRE_UNI_IS_DIGIT(ch);
220 case SRE_CATEGORY_UNI_SPACE:
221 return SRE_UNI_IS_SPACE(ch);
222 case SRE_CATEGORY_UNI_NOT_SPACE:
223 return !SRE_UNI_IS_SPACE(ch);
224 case SRE_CATEGORY_UNI_WORD:
225 return SRE_UNI_IS_WORD(ch);
226 case SRE_CATEGORY_UNI_NOT_WORD:
227 return !SRE_UNI_IS_WORD(ch);
228 case SRE_CATEGORY_UNI_LINEBREAK:
229 return SRE_UNI_IS_LINEBREAK(ch);
230 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
231 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000232#else
233 case SRE_CATEGORY_UNI_DIGIT:
234 return SRE_IS_DIGIT(ch);
235 case SRE_CATEGORY_UNI_NOT_DIGIT:
236 return !SRE_IS_DIGIT(ch);
237 case SRE_CATEGORY_UNI_SPACE:
238 return SRE_IS_SPACE(ch);
239 case SRE_CATEGORY_UNI_NOT_SPACE:
240 return !SRE_IS_SPACE(ch);
241 case SRE_CATEGORY_UNI_WORD:
242 return SRE_LOC_IS_WORD(ch);
243 case SRE_CATEGORY_UNI_NOT_WORD:
244 return !SRE_LOC_IS_WORD(ch);
245 case SRE_CATEGORY_UNI_LINEBREAK:
246 return SRE_IS_LINEBREAK(ch);
247 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
248 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000249#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000250 }
251 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000252}
253
254/* helpers */
255
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000256static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000257data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000258{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000259 if (state->data_stack) {
Jack Diederich2d400772006-05-27 15:44:34 +0000260 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000261 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000262 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000263 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000264}
265
266static int
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000267data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000268{
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000269 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000270 minsize = state->data_stack_base+size;
271 cursize = state->data_stack_size;
272 if (cursize < minsize) {
273 void* stack;
274 cursize = minsize+minsize/4+1024;
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300275 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Jack Diederich2d400772006-05-27 15:44:34 +0000276 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000277 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000278 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000279 return SRE_ERROR_MEMORY;
280 }
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000281 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000282 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000283 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000284 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000285}
286
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000287/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000288
289#define SRE_CHAR unsigned char
290#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000291#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000292#define SRE_CHARSET sre_charset
293#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000294#define SRE_MATCH sre_match
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000295#define SRE_MATCH_CONTEXT sre_match_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000296#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000297#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000298
299#if defined(HAVE_UNICODE)
300
Guido van Rossumb700df92000-03-31 14:59:30 +0000301#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000302#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000303#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000304
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000305#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000306#undef SRE_SEARCH
307#undef SRE_MATCH
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308#undef SRE_MATCH_CONTEXT
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000309#undef SRE_INFO
310#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000311#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000312#undef SRE_AT
313#undef SRE_CHAR
314
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000315/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000316
317#define SRE_CHAR Py_UNICODE
318#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000319#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000320#define SRE_CHARSET sre_ucharset
321#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000322#define SRE_MATCH sre_umatch
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000323#define SRE_MATCH_CONTEXT sre_umatch_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000324#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000325#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000326#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000327
328#endif /* SRE_RECURSIVE */
329
330/* -------------------------------------------------------------------- */
331/* String matching engine */
332
333/* the following section is compiled twice, with different character
334 settings */
335
336LOCAL(int)
337SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
338{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000339 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000341 Py_ssize_t thisp, thatp;
Guido van Rossumb700df92000-03-31 14:59:30 +0000342
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000343 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000344
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000345 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000346 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000347 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000348
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000349 case SRE_AT_BEGINNING_LINE:
350 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000351 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000354 return (((void*) (ptr+1) == state->end &&
355 SRE_IS_LINEBREAK((int) ptr[0])) ||
356 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000357
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000358 case SRE_AT_END_LINE:
359 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000360 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000361
Fredrik Lundh770617b2001-01-14 15:06:11 +0000362 case SRE_AT_END_STRING:
363 return ((void*) ptr == state->end);
364
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000365 case SRE_AT_BOUNDARY:
366 if (state->beginning == state->end)
367 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000368 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000369 SRE_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000370 thisp = ((void*) ptr < state->end) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000371 SRE_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000372 return thisp != thatp;
Fredrik Lundh80946112000-06-29 18:03:25 +0000373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000374 case SRE_AT_NON_BOUNDARY:
375 if (state->beginning == state->end)
376 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000377 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000378 SRE_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000379 thisp = ((void*) ptr < state->end) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000380 SRE_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000381 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000382
383 case SRE_AT_LOC_BOUNDARY:
384 if (state->beginning == state->end)
385 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000386 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000387 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000388 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000389 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000390 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000391
392 case SRE_AT_LOC_NON_BOUNDARY:
393 if (state->beginning == state->end)
394 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000395 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000396 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000397 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000398 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000399 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000400
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000401#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000402 case SRE_AT_UNI_BOUNDARY:
403 if (state->beginning == state->end)
404 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000405 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000406 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000407 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000408 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000409 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000410
411 case SRE_AT_UNI_NON_BOUNDARY:
412 if (state->beginning == state->end)
413 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000414 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000415 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000416 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000417 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000418 return thisp == thatp;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000419#endif
420
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000421 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000423 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000424}
425
426LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000427SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000428{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000430
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000431 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 for (;;) {
434 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000435
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000436 case SRE_OP_FAILURE:
437 return !ok;
438
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000439 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000440 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000441 if (ch == set[0])
442 return ok;
443 set++;
444 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000445
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000446 case SRE_OP_CATEGORY:
447 /* <CATEGORY> <code> */
448 if (sre_category(set[0], (int) ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000449 return ok;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000450 set += 1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000451 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000452
Fredrik Lundh3562f112000-07-02 12:00:07 +0000453 case SRE_OP_CHARSET:
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000454 if (sizeof(SRE_CODE) == 2) {
455 /* <CHARSET> <bitmap> (16 bits per code word) */
456 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
457 return ok;
458 set += 16;
Tim Peters3d563502006-01-21 02:47:53 +0000459 }
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000460 else {
461 /* <CHARSET> <bitmap> (32 bits per code word) */
Gregory P. Smith64ab35e2012-12-10 17:45:54 -0800462 if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31))))
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000463 return ok;
464 set += 8;
465 }
Fredrik Lundh3562f112000-07-02 12:00:07 +0000466 break;
467
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000468 case SRE_OP_RANGE:
469 /* <RANGE> <lower> <upper> */
470 if (set[0] <= ch && ch <= set[1])
471 return ok;
472 set += 2;
473 break;
474
475 case SRE_OP_NEGATE:
476 ok = !ok;
477 break;
478
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000479 case SRE_OP_BIGCHARSET:
480 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
481 {
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000482 Py_ssize_t count, block;
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000483 count = *(set++);
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000484
485 if (sizeof(SRE_CODE) == 2) {
486 block = ((unsigned char*)set)[ch >> 8];
487 set += 128;
488 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
489 return ok;
490 set += count*16;
491 }
492 else {
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000493 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
494 * warnings when c's type supports only numbers < N+1 */
495 if (!(ch & ~65535))
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000496 block = ((unsigned char*)set)[ch >> 8];
497 else
498 block = -1;
499 set += 64;
Tim Peters3d563502006-01-21 02:47:53 +0000500 if (block >=0 &&
Gregory P. Smith64ab35e2012-12-10 17:45:54 -0800501 (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31))))
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000502 return ok;
503 set += count*8;
504 }
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000505 break;
506 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000507
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000508 default:
509 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000510 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000511 return 0;
512 }
513 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000514}
515
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000516LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000517
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000518LOCAL(Py_ssize_t)
519SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000520{
521 SRE_CODE chr;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000522 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
523 SRE_CHAR* end = (SRE_CHAR *)state->end;
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000524 Py_ssize_t i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000525
526 /* adjust end */
Serhiy Storchakae18e05c2013-02-16 16:47:15 +0200527 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000528 end = ptr + maxcount;
529
530 switch (pattern[0]) {
531
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000532 case SRE_OP_IN:
533 /* repeated set */
534 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
535 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
536 ptr++;
537 break;
538
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000539 case SRE_OP_ANY:
540 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000541 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000542 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
543 ptr++;
544 break;
545
546 case SRE_OP_ANY_ALL:
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000547 /* repeated dot wildcard. skip to the end of the target
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000548 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000549 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000550 ptr = end;
551 break;
552
553 case SRE_OP_LITERAL:
554 /* repeated literal */
555 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000556 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000557 while (ptr < end && (SRE_CODE) *ptr == chr)
558 ptr++;
559 break;
560
561 case SRE_OP_LITERAL_IGNORE:
562 /* repeated literal */
563 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000564 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000565 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
566 ptr++;
567 break;
568
569 case SRE_OP_NOT_LITERAL:
570 /* repeated non-literal */
571 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000572 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000573 while (ptr < end && (SRE_CODE) *ptr != chr)
574 ptr++;
575 break;
Tim Peters3d563502006-01-21 02:47:53 +0000576
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000577 case SRE_OP_NOT_LITERAL_IGNORE:
578 /* repeated non-literal */
579 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000581 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
582 ptr++;
583 break;
584
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000585 default:
586 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000587 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000588 while ((SRE_CHAR*) state->ptr < end) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000589 i = SRE_MATCH(state, pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000590 if (i < 0)
591 return i;
592 if (!i)
593 break;
594 }
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300595 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000596 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000597 return (SRE_CHAR*) state->ptr - ptr;
598 }
599
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300600 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
601 ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000602 return ptr - (SRE_CHAR*) state->ptr;
603}
604
Fredrik Lundh33accc12000-08-27 20:59:47 +0000605#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000606LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000607SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
608{
609 /* check if an SRE_OP_INFO block matches at the current position.
610 returns the number of SRE_CODE objects to skip if successful, 0
611 if no match */
612
613 SRE_CHAR* end = state->end;
614 SRE_CHAR* ptr = state->ptr;
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000615 Py_ssize_t i;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000616
617 /* check minimal length */
618 if (pattern[3] && (end - ptr) < pattern[3])
619 return 0;
620
621 /* check known prefix */
622 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
623 /* <length> <skip> <prefix data> <overlap data> */
624 for (i = 0; i < pattern[5]; i++)
625 if ((SRE_CODE) ptr[i] != pattern[7 + i])
626 return 0;
627 return pattern[0] + 2 * pattern[6];
628 }
629 return pattern[0];
630}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000631#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000632
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000633/* The macros below should be used to protect recursive SRE_MATCH()
634 * calls that *failed* and do *not* return immediately (IOW, those
635 * that will backtrack). Explaining:
636 *
637 * - Recursive SRE_MATCH() returned true: that's usually a success
638 * (besides atypical cases like ASSERT_NOT), therefore there's no
639 * reason to restore lastmark;
640 *
641 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
642 * is returning to the caller: If the current SRE_MATCH() is the
643 * top function of the recursion, returning false will be a matching
644 * failure, and it doesn't matter where lastmark is pointing to.
645 * If it's *not* the top function, it will be a recursive SRE_MATCH()
646 * failure by itself, and the calling SRE_MATCH() will have to deal
647 * with the failure by the same rules explained here (it will restore
648 * lastmark by itself if necessary);
649 *
650 * - Recursive SRE_MATCH() returned false, and will continue the
651 * outside 'for' loop: must be protected when breaking, since the next
652 * OP could potentially depend on lastmark;
Tim Peters3d563502006-01-21 02:47:53 +0000653 *
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000654 * - Recursive SRE_MATCH() returned false, and will be called again
655 * inside a local for/while loop: must be protected between each
656 * loop iteration, since the recursive SRE_MATCH() could do anything,
657 * and could potentially depend on lastmark.
658 *
659 * For more information, check the discussion at SF patch #712900.
660 */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000661#define LASTMARK_SAVE() \
662 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000663 ctx->lastmark = state->lastmark; \
664 ctx->lastindex = state->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000665 } while (0)
666#define LASTMARK_RESTORE() \
667 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000668 state->lastmark = ctx->lastmark; \
669 state->lastindex = ctx->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000670 } while (0)
671
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000672#define RETURN_ERROR(i) do { return i; } while(0)
673#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
674#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
675
676#define RETURN_ON_ERROR(i) \
677 do { if (i < 0) RETURN_ERROR(i); } while (0)
678#define RETURN_ON_SUCCESS(i) \
679 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
680#define RETURN_ON_FAILURE(i) \
681 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
682
683#define SFY(x) #x
684
685#define DATA_STACK_ALLOC(state, type, ptr) \
686do { \
687 alloc_pos = state->data_stack_base; \
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300688 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
689 "(%" PY_FORMAT_SIZE_T "d)\n", \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000690 SFY(type), alloc_pos, sizeof(type))); \
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +0300691 if (sizeof(type) > state->data_stack_size - alloc_pos) { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000692 int j = data_stack_grow(state, sizeof(type)); \
693 if (j < 0) return j; \
694 if (ctx_pos != -1) \
695 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
696 } \
697 ptr = (type*)(state->data_stack+alloc_pos); \
698 state->data_stack_base += sizeof(type); \
699} while (0)
700
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000701#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
702do { \
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300703 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000704 ptr = (type*)(state->data_stack+pos); \
705} while (0)
706
707#define DATA_STACK_PUSH(state, data, size) \
708do { \
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300709 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
710 "(%" PY_FORMAT_SIZE_T "d)\n", \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000711 data, state->data_stack_base, size)); \
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +0300712 if (size > state->data_stack_size - state->data_stack_base) { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000713 int j = data_stack_grow(state, size); \
714 if (j < 0) return j; \
715 if (ctx_pos != -1) \
716 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
717 } \
718 memcpy(state->data_stack+state->data_stack_base, data, size); \
719 state->data_stack_base += size; \
720} while (0)
721
722#define DATA_STACK_POP(state, data, size, discard) \
723do { \
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300724 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
725 "(%" PY_FORMAT_SIZE_T "d)\n", \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000726 data, state->data_stack_base-size, size)); \
727 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
728 if (discard) \
729 state->data_stack_base -= size; \
730} while (0)
731
732#define DATA_STACK_POP_DISCARD(state, size) \
733do { \
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300734 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
735 "(%" PY_FORMAT_SIZE_T "d)\n", \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000736 state->data_stack_base-size, size)); \
737 state->data_stack_base -= size; \
738} while(0)
739
740#define DATA_PUSH(x) \
741 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
742#define DATA_POP(x) \
743 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000744#define DATA_POP_DISCARD(x) \
745 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
746#define DATA_ALLOC(t,p) \
747 DATA_STACK_ALLOC(state, t, p)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000748#define DATA_LOOKUP_AT(t,p,pos) \
749 DATA_STACK_LOOKUP_AT(state,t,p,pos)
750
751#define MARK_PUSH(lastmark) \
752 do if (lastmark > 0) { \
753 i = lastmark; /* ctx->lastmark may change if reallocated */ \
754 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
755 } while (0)
756#define MARK_POP(lastmark) \
757 do if (lastmark > 0) { \
758 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
759 } while (0)
760#define MARK_POP_KEEP(lastmark) \
761 do if (lastmark > 0) { \
762 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
763 } while (0)
764#define MARK_POP_DISCARD(lastmark) \
765 do if (lastmark > 0) { \
766 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
767 } while (0)
768
769#define JUMP_NONE 0
770#define JUMP_MAX_UNTIL_1 1
771#define JUMP_MAX_UNTIL_2 2
772#define JUMP_MAX_UNTIL_3 3
773#define JUMP_MIN_UNTIL_1 4
774#define JUMP_MIN_UNTIL_2 5
775#define JUMP_MIN_UNTIL_3 6
776#define JUMP_REPEAT 7
777#define JUMP_REPEAT_ONE_1 8
778#define JUMP_REPEAT_ONE_2 9
779#define JUMP_MIN_REPEAT_ONE 10
780#define JUMP_BRANCH 11
781#define JUMP_ASSERT 12
782#define JUMP_ASSERT_NOT 13
783
784#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
785 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
786 nextctx->last_ctx_pos = ctx_pos; \
787 nextctx->jump = jumpvalue; \
788 nextctx->pattern = nextpattern; \
789 ctx_pos = alloc_pos; \
790 ctx = nextctx; \
791 goto entrance; \
792 jumplabel: \
793 while (0) /* gcc doesn't like labels at end of scopes */ \
794
795typedef struct {
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000796 Py_ssize_t last_ctx_pos;
797 Py_ssize_t jump;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000798 SRE_CHAR* ptr;
799 SRE_CODE* pattern;
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000800 Py_ssize_t count;
801 Py_ssize_t lastmark;
802 Py_ssize_t lastindex;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000803 union {
804 SRE_CODE chr;
805 SRE_REPEAT* rep;
806 } u;
807} SRE_MATCH_CONTEXT;
808
809/* check if string matches the given pattern. returns <0 for
810 error, 0 for failure, and 1 for success */
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000811LOCAL(Py_ssize_t)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000812SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
Guido van Rossumb700df92000-03-31 14:59:30 +0000813{
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000814 SRE_CHAR* end = (SRE_CHAR *)state->end;
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000815 Py_ssize_t alloc_pos, ctx_pos = -1;
816 Py_ssize_t i, ret = 0;
817 Py_ssize_t jump;
Facundo Batista4473d222008-01-08 21:10:12 +0000818 unsigned int sigcount=0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000819
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000820 SRE_MATCH_CONTEXT* ctx;
821 SRE_MATCH_CONTEXT* nextctx;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000822
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000823 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000824
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000825 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
826 ctx->last_ctx_pos = -1;
827 ctx->jump = JUMP_NONE;
828 ctx->pattern = pattern;
829 ctx_pos = alloc_pos;
830
831entrance:
832
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000833 ctx->ptr = (SRE_CHAR *)state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000834
835 if (ctx->pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000836 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000837 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000838 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
Serhiy Storchakacf29ba82013-09-05 18:02:57 +0300839 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
840 "need %" PY_FORMAT_SIZE_T "d)\n",
841 (end - ctx->ptr), (Py_ssize_t) ctx->pattern[3]));
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000842 RETURN_FAILURE;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000843 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000844 ctx->pattern += ctx->pattern[1] + 1;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000845 }
846
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000847 for (;;) {
Facundo Batista4473d222008-01-08 21:10:12 +0000848 ++sigcount;
849 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
850 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
Guido van Rossumb700df92000-03-31 14:59:30 +0000851
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000852 switch (*ctx->pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000853
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000854 case SRE_OP_MARK:
855 /* set mark */
856 /* <MARK> <gid> */
857 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
858 ctx->ptr, ctx->pattern[0]));
859 i = ctx->pattern[0];
860 if (i & 1)
861 state->lastindex = i/2 + 1;
862 if (i > state->lastmark) {
863 /* state->lastmark is the highest valid index in the
864 state->mark array. If it is increased by more than 1,
865 the intervening marks must be set to NULL to signal
Tim Peters3d563502006-01-21 02:47:53 +0000866 that these marks have not been encountered. */
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000867 Py_ssize_t j = state->lastmark + 1;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000868 while (j < i)
869 state->mark[j++] = NULL;
870 state->lastmark = i;
871 }
872 state->mark[i] = ctx->ptr;
873 ctx->pattern++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000874 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000875
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000876 case SRE_OP_LITERAL:
877 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000878 /* <LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000879 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
880 ctx->ptr, *ctx->pattern));
881 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
882 RETURN_FAILURE;
883 ctx->pattern++;
884 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000885 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000886
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000887 case SRE_OP_NOT_LITERAL:
888 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000889 /* <NOT_LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000890 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
891 ctx->ptr, *ctx->pattern));
892 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
893 RETURN_FAILURE;
894 ctx->pattern++;
895 ctx->ptr++;
896 break;
897
898 case SRE_OP_SUCCESS:
899 /* end of pattern */
900 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
901 state->ptr = ctx->ptr;
902 RETURN_SUCCESS;
903
904 case SRE_OP_AT:
905 /* match at given position */
906 /* <AT> <code> */
907 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
908 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
909 RETURN_FAILURE;
910 ctx->pattern++;
911 break;
912
913 case SRE_OP_CATEGORY:
914 /* match at given category */
915 /* <CATEGORY> <code> */
916 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
917 ctx->ptr, *ctx->pattern));
918 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
919 RETURN_FAILURE;
920 ctx->pattern++;
921 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000922 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000923
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000924 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000925 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000926 /* <ANY> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000927 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
928 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
929 RETURN_FAILURE;
930 ctx->ptr++;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000931 break;
932
933 case SRE_OP_ANY_ALL:
934 /* match anything */
935 /* <ANY_ALL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000936 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
937 if (ctx->ptr >= end)
938 RETURN_FAILURE;
939 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000940 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000941
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000942 case SRE_OP_IN:
943 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000944 /* <IN> <skip> <set> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000945 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
946 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
947 RETURN_FAILURE;
948 ctx->pattern += ctx->pattern[0];
949 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000950 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000951
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000952 case SRE_OP_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000953 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
954 ctx->pattern, ctx->ptr, ctx->pattern[0]));
955 if (ctx->ptr >= end ||
956 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
957 RETURN_FAILURE;
958 ctx->pattern++;
959 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000960 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000961
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000962 case SRE_OP_NOT_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000963 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
964 ctx->pattern, ctx->ptr, *ctx->pattern));
965 if (ctx->ptr >= end ||
966 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
967 RETURN_FAILURE;
968 ctx->pattern++;
969 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000970 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000971
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000972 case SRE_OP_IN_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000973 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
974 if (ctx->ptr >= end
975 || !SRE_CHARSET(ctx->pattern+1,
976 (SRE_CODE)state->lower(*ctx->ptr)))
977 RETURN_FAILURE;
978 ctx->pattern += ctx->pattern[0];
979 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000980 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000981
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000982 case SRE_OP_JUMP:
983 case SRE_OP_INFO:
984 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000985 /* <JUMP> <offset> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000986 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
987 ctx->ptr, ctx->pattern[0]));
988 ctx->pattern += ctx->pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000989 break;
990
991 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000992 /* alternation */
993 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000994 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000995 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000996 ctx->u.rep = state->repeat;
997 if (ctx->u.rep)
998 MARK_PUSH(ctx->lastmark);
999 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
1000 if (ctx->pattern[1] == SRE_OP_LITERAL &&
1001 (ctx->ptr >= end ||
1002 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001003 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001004 if (ctx->pattern[1] == SRE_OP_IN &&
1005 (ctx->ptr >= end ||
1006 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001007 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001008 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001009 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001010 if (ret) {
1011 if (ctx->u.rep)
1012 MARK_POP_DISCARD(ctx->lastmark);
1013 RETURN_ON_ERROR(ret);
1014 RETURN_SUCCESS;
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +00001015 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001016 if (ctx->u.rep)
1017 MARK_POP_KEEP(ctx->lastmark);
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001018 LASTMARK_RESTORE();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001019 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001020 if (ctx->u.rep)
1021 MARK_POP_DISCARD(ctx->lastmark);
1022 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001023
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001024 case SRE_OP_REPEAT_ONE:
1025 /* match repeated sequence (maximizing regexp) */
1026
1027 /* this operator only works if the repeated item is
1028 exactly one character wide, and we're not already
1029 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +00001030 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001031
1032 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1033
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001034 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1035 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001036
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +03001037 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001038 RETURN_FAILURE; /* cannot match */
Fredrik Lundhe1869832000-08-01 22:47:49 +00001039
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001040 state->ptr = ctx->ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001041
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001042 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1043 RETURN_ON_ERROR(ret);
1044 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1045 ctx->count = ret;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001046 ctx->ptr += ctx->count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001047
1048 /* when we arrive here, count contains the number of
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001049 matches, and ctx->ptr points to the tail of the target
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001050 string. check if the rest of the pattern matches,
1051 and backtrack if not. */
1052
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001053 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001054 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001055
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001056 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001057 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001058 state->ptr = ctx->ptr;
1059 RETURN_SUCCESS;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001060 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001061
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001062 LASTMARK_SAVE();
1063
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001064 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001065 /* tail starts with a literal. skip positions where
1066 the rest of the pattern cannot possibly match */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001067 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001068 for (;;) {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001069 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001070 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1071 ctx->ptr--;
1072 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001073 }
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001074 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001075 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001076 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001077 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1078 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001079 if (ret) {
1080 RETURN_ON_ERROR(ret);
1081 RETURN_SUCCESS;
1082 }
Tim Peters3d563502006-01-21 02:47:53 +00001083
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001084 LASTMARK_RESTORE();
Tim Peters3d563502006-01-21 02:47:53 +00001085
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001086 ctx->ptr--;
1087 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001088 }
1089
1090 } else {
1091 /* general case */
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001092 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001093 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001094 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1095 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001096 if (ret) {
1097 RETURN_ON_ERROR(ret);
1098 RETURN_SUCCESS;
1099 }
1100 ctx->ptr--;
1101 ctx->count--;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001102 LASTMARK_RESTORE();
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001103 }
1104 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001105 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001106
Guido van Rossum41c99e72003-04-14 17:59:34 +00001107 case SRE_OP_MIN_REPEAT_ONE:
1108 /* match repeated sequence (minimizing regexp) */
1109
1110 /* this operator only works if the repeated item is
1111 exactly one character wide, and we're not already
1112 collecting backtracking points. for other cases,
1113 use the MIN_REPEAT operator */
1114
1115 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1116
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001117 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1118 ctx->pattern[1], ctx->pattern[2]));
Guido van Rossum41c99e72003-04-14 17:59:34 +00001119
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +03001120 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001121 RETURN_FAILURE; /* cannot match */
Guido van Rossum41c99e72003-04-14 17:59:34 +00001122
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001123 state->ptr = ctx->ptr;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001124
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001125 if (ctx->pattern[1] == 0)
1126 ctx->count = 0;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001127 else {
1128 /* count using pattern min as the maximum */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001129 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1130 RETURN_ON_ERROR(ret);
1131 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001132 if (ret < (Py_ssize_t) ctx->pattern[1])
Tim Peters3d563502006-01-21 02:47:53 +00001133 /* didn't match minimum number of times */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001134 RETURN_FAILURE;
1135 /* advance past minimum matches of repeat */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001136 ctx->count = ret;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001137 ctx->ptr += ctx->count;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001138 }
1139
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001140 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Guido van Rossum41c99e72003-04-14 17:59:34 +00001141 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001142 state->ptr = ctx->ptr;
1143 RETURN_SUCCESS;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001144
1145 } else {
1146 /* general case */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001147 LASTMARK_SAVE();
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02001148 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001149 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001150 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001151 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1152 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001153 if (ret) {
1154 RETURN_ON_ERROR(ret);
1155 RETURN_SUCCESS;
1156 }
1157 state->ptr = ctx->ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001158 ret = SRE_COUNT(state, ctx->pattern+3, 1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001159 RETURN_ON_ERROR(ret);
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001160 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001161 if (ret == 0)
Guido van Rossum41c99e72003-04-14 17:59:34 +00001162 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001163 assert(ret == 1);
1164 ctx->ptr++;
1165 ctx->count++;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001166 LASTMARK_RESTORE();
Guido van Rossum41c99e72003-04-14 17:59:34 +00001167 }
Guido van Rossum41c99e72003-04-14 17:59:34 +00001168 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001169 RETURN_FAILURE;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001170
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001171 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001172 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001173 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001174 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001175 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1176 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001177
1178 /* install new repeat context */
Jack Diederich2d400772006-05-27 15:44:34 +00001179 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00001180 if (!ctx->u.rep) {
1181 PyErr_NoMemory();
Neal Norwitzef0de022006-08-12 01:53:28 +00001182 RETURN_FAILURE;
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00001183 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001184 ctx->u.rep->count = -1;
1185 ctx->u.rep->pattern = ctx->pattern;
1186 ctx->u.rep->prev = state->repeat;
1187 ctx->u.rep->last_ptr = NULL;
1188 state->repeat = ctx->u.rep;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001189
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001190 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001191 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001192 state->repeat = ctx->u.rep->prev;
Jack Diederich2d400772006-05-27 15:44:34 +00001193 PyObject_FREE(ctx->u.rep);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001194
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001195 if (ret) {
1196 RETURN_ON_ERROR(ret);
1197 RETURN_SUCCESS;
1198 }
1199 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001200
1201 case SRE_OP_MAX_UNTIL:
1202 /* maximizing repeat */
1203 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1204
1205 /* FIXME: we probably need to deal with zero-width
1206 matches in here... */
1207
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001208 ctx->u.rep = state->repeat;
1209 if (!ctx->u.rep)
1210 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001211
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001212 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001213
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001214 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001215
Serhiy Storchakacf29ba82013-09-05 18:02:57 +03001216 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001217 ctx->ptr, ctx->count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001218
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +03001219 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001220 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001221 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001222 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1223 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001224 if (ret) {
1225 RETURN_ON_ERROR(ret);
1226 RETURN_SUCCESS;
1227 }
1228 ctx->u.rep->count = ctx->count-1;
1229 state->ptr = ctx->ptr;
1230 RETURN_FAILURE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001231 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001232
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +03001233 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02001234 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001235 state->ptr != ctx->u.rep->last_ptr) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001236 /* we may have enough matches, but if we can
1237 match another item, do so */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001238 ctx->u.rep->count = ctx->count;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001239 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001240 MARK_PUSH(ctx->lastmark);
1241 /* zero-width match protection */
1242 DATA_PUSH(&ctx->u.rep->last_ptr);
1243 ctx->u.rep->last_ptr = state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001244 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1245 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001246 DATA_POP(&ctx->u.rep->last_ptr);
1247 if (ret) {
1248 MARK_POP_DISCARD(ctx->lastmark);
1249 RETURN_ON_ERROR(ret);
1250 RETURN_SUCCESS;
1251 }
1252 MARK_POP(ctx->lastmark);
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001253 LASTMARK_RESTORE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001254 ctx->u.rep->count = ctx->count-1;
1255 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001256 }
1257
1258 /* cannot match more repeated items here. make sure the
1259 tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001260 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001261 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001262 RETURN_ON_SUCCESS(ret);
1263 state->repeat = ctx->u.rep;
1264 state->ptr = ctx->ptr;
1265 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001266
1267 case SRE_OP_MIN_UNTIL:
1268 /* minimizing repeat */
1269 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1270
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001271 ctx->u.rep = state->repeat;
1272 if (!ctx->u.rep)
1273 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001274
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001275 state->ptr = ctx->ptr;
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001276
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001277 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001278
Serhiy Storchakacf29ba82013-09-05 18:02:57 +03001279 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001280 ctx->ptr, ctx->count, ctx->u.rep->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001281
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +03001282 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001283 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001284 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001285 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1286 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001287 if (ret) {
1288 RETURN_ON_ERROR(ret);
1289 RETURN_SUCCESS;
1290 }
1291 ctx->u.rep->count = ctx->count-1;
1292 state->ptr = ctx->ptr;
1293 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001294 }
1295
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001296 LASTMARK_SAVE();
1297
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001298 /* see if the tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001299 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001300 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001301 if (ret) {
1302 RETURN_ON_ERROR(ret);
1303 RETURN_SUCCESS;
1304 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001305
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001306 state->repeat = ctx->u.rep;
1307 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001308
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001309 LASTMARK_RESTORE();
1310
Serhiy Storchaka3ade66c2013-08-03 19:26:33 +03001311 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +02001312 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1313 state->ptr == ctx->u.rep->last_ptr)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001314 RETURN_FAILURE;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001315
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001316 ctx->u.rep->count = ctx->count;
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +02001317 /* zero-width match protection */
1318 DATA_PUSH(&ctx->u.rep->last_ptr);
1319 ctx->u.rep->last_ptr = state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001320 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1321 ctx->u.rep->pattern+3);
Serhiy Storchaka6a8e2b42013-02-16 21:23:01 +02001322 DATA_POP(&ctx->u.rep->last_ptr);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001323 if (ret) {
1324 RETURN_ON_ERROR(ret);
1325 RETURN_SUCCESS;
1326 }
1327 ctx->u.rep->count = ctx->count-1;
1328 state->ptr = ctx->ptr;
1329 RETURN_FAILURE;
1330
1331 case SRE_OP_GROUPREF:
1332 /* match backreference */
1333 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1334 ctx->ptr, ctx->pattern[0]));
1335 i = ctx->pattern[0];
1336 {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001337 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001338 if (groupref >= state->lastmark) {
1339 RETURN_FAILURE;
1340 } else {
1341 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1342 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1343 if (!p || !e || e < p)
1344 RETURN_FAILURE;
1345 while (p < e) {
1346 if (ctx->ptr >= end || *ctx->ptr != *p)
1347 RETURN_FAILURE;
1348 p++; ctx->ptr++;
1349 }
1350 }
1351 }
1352 ctx->pattern++;
1353 break;
1354
1355 case SRE_OP_GROUPREF_IGNORE:
1356 /* match backreference */
1357 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1358 ctx->ptr, ctx->pattern[0]));
1359 i = ctx->pattern[0];
1360 {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001361 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001362 if (groupref >= state->lastmark) {
1363 RETURN_FAILURE;
1364 } else {
1365 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1366 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1367 if (!p || !e || e < p)
1368 RETURN_FAILURE;
1369 while (p < e) {
1370 if (ctx->ptr >= end ||
1371 state->lower(*ctx->ptr) != state->lower(*p))
1372 RETURN_FAILURE;
1373 p++; ctx->ptr++;
1374 }
1375 }
1376 }
1377 ctx->pattern++;
1378 break;
1379
1380 case SRE_OP_GROUPREF_EXISTS:
1381 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1382 ctx->ptr, ctx->pattern[0]));
1383 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1384 i = ctx->pattern[0];
1385 {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001386 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001387 if (groupref >= state->lastmark) {
1388 ctx->pattern += ctx->pattern[1];
1389 break;
1390 } else {
1391 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1392 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1393 if (!p || !e || e < p) {
1394 ctx->pattern += ctx->pattern[1];
1395 break;
1396 }
1397 }
1398 }
1399 ctx->pattern += 2;
1400 break;
1401
1402 case SRE_OP_ASSERT:
1403 /* assert subpattern */
1404 /* <ASSERT> <skip> <back> <pattern> */
1405 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1406 ctx->ptr, ctx->pattern[1]));
1407 state->ptr = ctx->ptr - ctx->pattern[1];
1408 if (state->ptr < state->beginning)
1409 RETURN_FAILURE;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001410 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001411 RETURN_ON_FAILURE(ret);
1412 ctx->pattern += ctx->pattern[0];
1413 break;
1414
1415 case SRE_OP_ASSERT_NOT:
1416 /* assert not subpattern */
1417 /* <ASSERT_NOT> <skip> <back> <pattern> */
1418 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1419 ctx->ptr, ctx->pattern[1]));
1420 state->ptr = ctx->ptr - ctx->pattern[1];
1421 if (state->ptr >= state->beginning) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001422 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001423 if (ret) {
1424 RETURN_ON_ERROR(ret);
1425 RETURN_FAILURE;
1426 }
1427 }
1428 ctx->pattern += ctx->pattern[0];
1429 break;
1430
1431 case SRE_OP_FAILURE:
1432 /* immediate failure */
1433 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1434 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001435
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001436 default:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001437 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1438 ctx->pattern[-1]));
1439 RETURN_ERROR(SRE_ERROR_ILLEGAL);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001440 }
1441 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001442
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001443exit:
1444 ctx_pos = ctx->last_ctx_pos;
1445 jump = ctx->jump;
1446 DATA_POP_DISCARD(ctx);
1447 if (ctx_pos == -1)
1448 return ret;
1449 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1450
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001451 switch (jump) {
1452 case JUMP_MAX_UNTIL_2:
1453 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1454 goto jump_max_until_2;
1455 case JUMP_MAX_UNTIL_3:
1456 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1457 goto jump_max_until_3;
1458 case JUMP_MIN_UNTIL_2:
1459 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1460 goto jump_min_until_2;
1461 case JUMP_MIN_UNTIL_3:
1462 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1463 goto jump_min_until_3;
1464 case JUMP_BRANCH:
1465 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1466 goto jump_branch;
1467 case JUMP_MAX_UNTIL_1:
1468 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1469 goto jump_max_until_1;
1470 case JUMP_MIN_UNTIL_1:
1471 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1472 goto jump_min_until_1;
1473 case JUMP_REPEAT:
1474 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1475 goto jump_repeat;
1476 case JUMP_REPEAT_ONE_1:
1477 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1478 goto jump_repeat_one_1;
1479 case JUMP_REPEAT_ONE_2:
1480 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1481 goto jump_repeat_one_2;
1482 case JUMP_MIN_REPEAT_ONE:
1483 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1484 goto jump_min_repeat_one;
1485 case JUMP_ASSERT:
1486 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1487 goto jump_assert;
1488 case JUMP_ASSERT_NOT:
1489 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1490 goto jump_assert_not;
1491 case JUMP_NONE:
Serhiy Storchakacf29ba82013-09-05 18:02:57 +03001492 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1493 ctx->ptr, ret));
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001494 break;
1495 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001496
1497 return ret; /* should never get here */
Guido van Rossumb700df92000-03-31 14:59:30 +00001498}
1499
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001500LOCAL(Py_ssize_t)
Guido van Rossumb700df92000-03-31 14:59:30 +00001501SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1502{
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001503 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1504 SRE_CHAR* end = (SRE_CHAR *)state->end;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001505 Py_ssize_t status = 0;
1506 Py_ssize_t prefix_len = 0;
1507 Py_ssize_t prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001508 SRE_CODE* prefix = NULL;
1509 SRE_CODE* charset = NULL;
1510 SRE_CODE* overlap = NULL;
1511 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001512
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001513 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001514 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001515 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001516
1517 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001518
Gustavo Niemeyer28b5bb32003-06-26 14:41:08 +00001519 if (pattern[3] > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001520 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001521 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001522 end -= pattern[3]-1;
1523 if (end <= ptr)
1524 end = ptr+1;
1525 }
1526
Fredrik Lundh3562f112000-07-02 12:00:07 +00001527 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001528 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001529 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001530 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001531 prefix_skip = pattern[6];
1532 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001533 overlap = prefix + prefix_len - 1;
1534 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001535 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001536 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001537 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001538
1539 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001540 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001541
Serhiy Storchakacf29ba82013-09-05 18:02:57 +03001542 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1543 prefix, prefix_len, prefix_skip));
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001544 TRACE(("charset = %p\n", charset));
1545
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001546#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001547 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001548 /* pattern starts with a known prefix. use the overlap
1549 table to skip forward as fast as we possibly can */
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001550 Py_ssize_t i = 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001551 end = (SRE_CHAR *)state->end;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001552 while (ptr < end) {
1553 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001554 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001555 if (!i)
1556 break;
1557 else
1558 i = overlap[i];
1559 } else {
1560 if (++i == prefix_len) {
1561 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001562 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1563 state->start = ptr + 1 - prefix_len;
1564 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001565 if (flags & SRE_INFO_LITERAL)
1566 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001567 status = SRE_MATCH(state, pattern + 2*prefix_skip);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001568 if (status != 0)
1569 return status;
1570 /* close but no cigar -- try again */
1571 i = overlap[i];
1572 }
1573 break;
1574 }
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001575 }
1576 ptr++;
1577 }
1578 return 0;
1579 }
1580#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001581
Fredrik Lundh3562f112000-07-02 12:00:07 +00001582 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001583 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001584 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001585 SRE_CODE chr = pattern[1];
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001586 end = (SRE_CHAR *)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001587 for (;;) {
1588 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1589 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001590 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001591 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001592 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001593 state->start = ptr;
1594 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001595 if (flags & SRE_INFO_LITERAL)
1596 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001597 status = SRE_MATCH(state, pattern + 2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001598 if (status != 0)
1599 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001600 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001601 } else if (charset) {
1602 /* pattern starts with a character from a known set */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001603 end = (SRE_CHAR *)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001604 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001605 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001606 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001607 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001608 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001609 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001610 state->start = ptr;
1611 state->ptr = ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001612 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001613 if (status != 0)
1614 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001615 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001616 }
1617 } else
1618 /* general case */
1619 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001620 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001621 state->start = state->ptr = ptr++;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001622 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001623 if (status != 0)
1624 break;
1625 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001626
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001627 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001628}
Tim Peters3d563502006-01-21 02:47:53 +00001629
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001630LOCAL(int)
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001631SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001632{
1633 /* check if given string is a literal template (i.e. no escapes) */
1634 while (len-- > 0)
1635 if (*ptr++ == '\\')
1636 return 0;
1637 return 1;
1638}
Guido van Rossumb700df92000-03-31 14:59:30 +00001639
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001640#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001641
1642/* -------------------------------------------------------------------- */
1643/* factories and destructors */
1644
1645/* see sre.h for object declarations */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001646static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1647static PyObject*pattern_scanner(PatternObject*, PyObject*);
Guido van Rossumb700df92000-03-31 14:59:30 +00001648
1649static PyObject *
Georg Brandl964f5972006-05-28 22:38:57 +00001650sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +00001651{
Benjamin Peterson9dccb012013-01-10 10:37:47 -06001652 return PyInt_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001653}
1654
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001655static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001656sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001657{
1658 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001659 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001660 return NULL;
1661 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001662 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001663 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001664#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001665 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001666#else
1667 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001668#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001669 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001670}
1671
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001672LOCAL(void)
1673state_reset(SRE_STATE* state)
1674{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001675 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001676 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001677
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001678 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001679 state->lastindex = -1;
1680
1681 state->repeat = NULL;
1682
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001683 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001684}
1685
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001686static void*
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001687getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001688{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001689 /* given a python object, return a data pointer, a length (in
1690 characters), and a character size. return NULL if the object
1691 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +00001692
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 PyBufferProcs *buffer;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001694 Py_ssize_t size, bytes;
1695 int charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001696 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001697
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001698#if defined(HAVE_UNICODE)
1699 if (PyUnicode_Check(string)) {
1700 /* unicode strings doesn't always support the buffer interface */
1701 ptr = (void*) PyUnicode_AS_DATA(string);
Brett Cannon8ffe7bb2010-05-03 23:51:28 +00001702 /* bytes = PyUnicode_GET_DATA_SIZE(string); */
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001703 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001704 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001705
1706 } else {
1707#endif
1708
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001709 /* get pointer to string buffer */
Christian Heimese93237d2007-12-19 02:37:44 +00001710 buffer = Py_TYPE(string)->tp_as_buffer;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001711 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1712 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001713 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001714 return NULL;
1715 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001716
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001717 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001718 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1719 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001720 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1721 return NULL;
1722 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001723
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001724 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001725#if PY_VERSION_HEX >= 0x01060000
1726 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001727#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001728 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001729#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001730
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001731 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001732 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001733#if defined(HAVE_UNICODE)
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001734 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001735 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001736#endif
1737 else {
1738 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1739 return NULL;
1740 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001741
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001742#if defined(HAVE_UNICODE)
1743 }
1744#endif
1745
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001746 *p_length = size;
1747 *p_charsize = charsize;
1748
1749 return ptr;
1750}
1751
1752LOCAL(PyObject*)
1753state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001754 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001755{
1756 /* prepare state object */
1757
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001758 Py_ssize_t length;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001759 int charsize;
1760 void* ptr;
1761
1762 memset(state, 0, sizeof(SRE_STATE));
1763
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001764 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001765 state->lastindex = -1;
1766
1767 ptr = getstring(string, &length, &charsize);
1768 if (!ptr)
1769 return NULL;
1770
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001771 /* adjust boundaries */
1772 if (start < 0)
1773 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001774 else if (start > length)
1775 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001776
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001777 if (end < 0)
1778 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001779 else if (end > length)
1780 end = length;
1781
1782 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001783
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001784 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001785
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001786 state->start = (void*) ((char*) ptr + start * state->charsize);
1787 state->end = (void*) ((char*) ptr + end * state->charsize);
1788
1789 Py_INCREF(string);
1790 state->string = string;
1791 state->pos = start;
1792 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001793
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001794 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001795 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001796 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001797#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001798 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001799#else
1800 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001801#endif
1802 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001803 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001804
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001805 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001806}
1807
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001808LOCAL(void)
1809state_fini(SRE_STATE* state)
1810{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001811 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001812 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001813}
1814
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001815/* calculate offset from start of string */
1816#define STATE_OFFSET(state, member)\
1817 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1818
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001819LOCAL(PyObject*)
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001820state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001821{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001822 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +00001823
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001824 index = (index - 1) * 2;
1825
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001826 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001827 if (empty)
1828 /* want empty string */
1829 i = j = 0;
1830 else {
1831 Py_INCREF(Py_None);
1832 return Py_None;
1833 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001834 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001835 i = STATE_OFFSET(state, state->mark[index]);
1836 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001837 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001838
Fredrik Lundh58100642000-08-09 09:14:35 +00001839 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001840}
1841
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001842static void
1843pattern_error(int status)
1844{
1845 switch (status) {
1846 case SRE_ERROR_RECURSION_LIMIT:
1847 PyErr_SetString(
1848 PyExc_RuntimeError,
1849 "maximum recursion limit exceeded"
1850 );
1851 break;
1852 case SRE_ERROR_MEMORY:
1853 PyErr_NoMemory();
1854 break;
Facundo Batista4473d222008-01-08 21:10:12 +00001855 case SRE_ERROR_INTERRUPTED:
1856 /* An exception has already been raised, so let it fly */
1857 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001858 default:
1859 /* other error codes indicate compiler/engine bugs */
1860 PyErr_SetString(
1861 PyExc_RuntimeError,
1862 "internal error in regular expression engine"
1863 );
1864 }
1865}
1866
Guido van Rossumb700df92000-03-31 14:59:30 +00001867static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001868pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001869{
Raymond Hettinger027bb632004-05-31 03:09:25 +00001870 if (self->weakreflist != NULL)
1871 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001872 Py_XDECREF(self->pattern);
1873 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001874 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001875 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001876}
1877
1878static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001879pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001880{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001881 SRE_STATE state;
1882 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001883
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001884 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001885 Py_ssize_t start = 0;
1886 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001887 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001888 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001889 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001890 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001891
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001892 string = state_init(&state, self, string, start, end);
1893 if (!string)
1894 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001895
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001896 state.ptr = state.start;
1897
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001898 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1899
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001900 if (state.charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001901 status = sre_match(&state, PatternObject_GetCode(self));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001902 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001903#if defined(HAVE_UNICODE)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001904 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001905#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001906 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001907
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001908 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00001909 if (PyErr_Occurred())
1910 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001911
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001912 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001913
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001914 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001915}
1916
1917static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001918pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001919{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001920 SRE_STATE state;
1921 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001922
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001923 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001924 Py_ssize_t start = 0;
1925 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001926 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001927 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001928 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001929 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001930
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001931 string = state_init(&state, self, string, start, end);
1932 if (!string)
1933 return NULL;
1934
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001935 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1936
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001937 if (state.charsize == 1) {
1938 status = sre_search(&state, PatternObject_GetCode(self));
1939 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001940#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001941 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001942#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001943 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001944
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001945 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1946
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001947 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001948
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00001949 if (PyErr_Occurred())
1950 return NULL;
1951
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001952 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001953}
1954
1955static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001956call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001957{
1958 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001959 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001960 PyObject* func;
1961 PyObject* result;
1962
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001963 if (!args)
1964 return NULL;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001965 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001966 if (!name)
1967 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001968 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001969 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001970 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001971 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001972 func = PyObject_GetAttrString(mod, function);
1973 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001974 if (!func)
1975 return NULL;
1976 result = PyObject_CallObject(func, args);
1977 Py_DECREF(func);
1978 Py_DECREF(args);
1979 return result;
1980}
1981
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001982#ifdef USE_BUILTIN_COPY
1983static int
1984deepcopy(PyObject** object, PyObject* memo)
1985{
1986 PyObject* copy;
1987
1988 copy = call(
1989 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001990 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001991 );
1992 if (!copy)
1993 return 0;
1994
1995 Py_DECREF(*object);
1996 *object = copy;
1997
1998 return 1; /* success */
1999}
2000#endif
2001
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002002static PyObject*
Guido van Rossum1ff91d92007-09-10 22:02:25 +00002003join_list(PyObject* list, PyObject* string)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002004{
2005 /* join list elements */
2006
2007 PyObject* joiner;
2008#if PY_VERSION_HEX >= 0x01060000
2009 PyObject* function;
2010 PyObject* args;
2011#endif
2012 PyObject* result;
2013
Guido van Rossum1ff91d92007-09-10 22:02:25 +00002014 joiner = PySequence_GetSlice(string, 0, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002015 if (!joiner)
2016 return NULL;
2017
Guido van Rossum1ff91d92007-09-10 22:02:25 +00002018 if (PyList_GET_SIZE(list) == 0) {
2019 Py_DECREF(list);
2020 return joiner;
2021 }
2022
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002023#if PY_VERSION_HEX >= 0x01060000
2024 function = PyObject_GetAttrString(joiner, "join");
2025 if (!function) {
2026 Py_DECREF(joiner);
2027 return NULL;
2028 }
2029 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002030 if (!args) {
2031 Py_DECREF(function);
2032 Py_DECREF(joiner);
2033 return NULL;
2034 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002035 PyTuple_SET_ITEM(args, 0, list);
2036 result = PyObject_CallObject(function, args);
2037 Py_DECREF(args); /* also removes list */
2038 Py_DECREF(function);
2039#else
2040 result = call(
2041 "string", "join",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002042 PyTuple_Pack(2, list, joiner)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002043 );
2044#endif
2045 Py_DECREF(joiner);
2046
2047 return result;
2048}
2049
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002050static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002051pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002052{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002053 SRE_STATE state;
2054 PyObject* list;
2055 int status;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002056 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00002057
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002059 Py_ssize_t start = 0;
2060 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002061 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002062 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00002063 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002065
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002066 string = state_init(&state, self, string, start, end);
2067 if (!string)
2068 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002069
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002070 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002071 if (!list) {
2072 state_fini(&state);
2073 return NULL;
2074 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002075
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002076 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002077
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002078 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +00002079
Fredrik Lundhebc37b22000-10-28 19:30:41 +00002080 state_reset(&state);
2081
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 state.ptr = state.start;
2083
2084 if (state.charsize == 1) {
2085 status = sre_search(&state, PatternObject_GetCode(self));
2086 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002087#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002088 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002089#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002090 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002091
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00002092 if (PyErr_Occurred())
2093 goto error;
2094
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002095 if (status <= 0) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002096 if (status == 0)
2097 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00002098 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002099 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002100 }
Tim Peters3d563502006-01-21 02:47:53 +00002101
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002102 /* don't bother to build a match object */
2103 switch (self->groups) {
2104 case 0:
2105 b = STATE_OFFSET(&state, state.start);
2106 e = STATE_OFFSET(&state, state.ptr);
2107 item = PySequence_GetSlice(string, b, e);
2108 if (!item)
2109 goto error;
2110 break;
2111 case 1:
2112 item = state_getslice(&state, 1, string, 1);
2113 if (!item)
2114 goto error;
2115 break;
2116 default:
2117 item = PyTuple_New(self->groups);
2118 if (!item)
2119 goto error;
2120 for (i = 0; i < self->groups; i++) {
2121 PyObject* o = state_getslice(&state, i+1, string, 1);
2122 if (!o) {
2123 Py_DECREF(item);
2124 goto error;
2125 }
2126 PyTuple_SET_ITEM(item, i, o);
2127 }
2128 break;
2129 }
2130
2131 status = PyList_Append(list, item);
2132 Py_DECREF(item);
2133 if (status < 0)
2134 goto error;
2135
2136 if (state.ptr == state.start)
2137 state.start = (void*) ((char*) state.ptr + state.charsize);
2138 else
2139 state.start = state.ptr;
2140
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002141 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002142
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002143 state_fini(&state);
2144 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00002145
2146error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002147 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002148 state_fini(&state);
2149 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002150
Guido van Rossumb700df92000-03-31 14:59:30 +00002151}
2152
Fredrik Lundh703ce812001-10-24 22:16:30 +00002153#if PY_VERSION_HEX >= 0x02020000
2154static PyObject*
2155pattern_finditer(PatternObject* pattern, PyObject* args)
2156{
2157 PyObject* scanner;
2158 PyObject* search;
2159 PyObject* iterator;
2160
2161 scanner = pattern_scanner(pattern, args);
2162 if (!scanner)
2163 return NULL;
2164
2165 search = PyObject_GetAttrString(scanner, "search");
2166 Py_DECREF(scanner);
2167 if (!search)
2168 return NULL;
2169
2170 iterator = PyCallIter_New(search, Py_None);
2171 Py_DECREF(search);
2172
2173 return iterator;
2174}
2175#endif
2176
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002177static PyObject*
2178pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2179{
2180 SRE_STATE state;
2181 PyObject* list;
2182 PyObject* item;
2183 int status;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002184 Py_ssize_t n;
2185 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002186 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002187
2188 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002189 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002190 static char* kwlist[] = { "source", "maxsplit", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002191 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002192 &string, &maxsplit))
2193 return NULL;
2194
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002195 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002196 if (!string)
2197 return NULL;
2198
2199 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002200 if (!list) {
2201 state_fini(&state);
2202 return NULL;
2203 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002204
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002205 n = 0;
2206 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002207
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002208 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002209
2210 state_reset(&state);
2211
2212 state.ptr = state.start;
2213
2214 if (state.charsize == 1) {
2215 status = sre_search(&state, PatternObject_GetCode(self));
2216 } else {
2217#if defined(HAVE_UNICODE)
2218 status = sre_usearch(&state, PatternObject_GetCode(self));
2219#endif
2220 }
2221
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00002222 if (PyErr_Occurred())
2223 goto error;
2224
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002225 if (status <= 0) {
2226 if (status == 0)
2227 break;
2228 pattern_error(status);
2229 goto error;
2230 }
Tim Peters3d563502006-01-21 02:47:53 +00002231
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002232 if (state.start == state.ptr) {
2233 if (last == state.end)
2234 break;
2235 /* skip one character */
2236 state.start = (void*) ((char*) state.ptr + state.charsize);
2237 continue;
2238 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002239
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002240 /* get segment before this match */
2241 item = PySequence_GetSlice(
2242 string, STATE_OFFSET(&state, last),
2243 STATE_OFFSET(&state, state.start)
2244 );
2245 if (!item)
2246 goto error;
2247 status = PyList_Append(list, item);
2248 Py_DECREF(item);
2249 if (status < 0)
2250 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002251
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002252 /* add groups (if any) */
2253 for (i = 0; i < self->groups; i++) {
2254 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002255 if (!item)
2256 goto error;
2257 status = PyList_Append(list, item);
2258 Py_DECREF(item);
2259 if (status < 0)
2260 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002261 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002262
2263 n = n + 1;
2264
2265 last = state.start = state.ptr;
2266
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002267 }
2268
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002269 /* get segment following last match (even if empty) */
2270 item = PySequence_GetSlice(
2271 string, STATE_OFFSET(&state, last), state.endpos
2272 );
2273 if (!item)
2274 goto error;
2275 status = PyList_Append(list, item);
2276 Py_DECREF(item);
2277 if (status < 0)
2278 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002279
2280 state_fini(&state);
2281 return list;
2282
2283error:
2284 Py_DECREF(list);
2285 state_fini(&state);
2286 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002287
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002288}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002289
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002290static PyObject*
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002291pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002292 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002293{
2294 SRE_STATE state;
2295 PyObject* list;
2296 PyObject* item;
2297 PyObject* filter;
2298 PyObject* args;
2299 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002300 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002301 int status;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002302 Py_ssize_t n;
2303 Py_ssize_t i, b, e;
2304 int bint;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002305 int filter_is_callable;
2306
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002307 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002308 /* sub/subn takes either a function or a template */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002309 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002310 Py_INCREF(filter);
2311 filter_is_callable = 1;
2312 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002313 /* if not callable, check if it's a literal string */
2314 int literal;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002315 ptr = getstring(ptemplate, &n, &bint);
2316 b = bint;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002317 if (ptr) {
2318 if (b == 1) {
Skip Montanaro816a1622006-04-18 11:53:09 +00002319 literal = sre_literal_template((unsigned char *)ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002320 } else {
2321#if defined(HAVE_UNICODE)
Skip Montanaro816a1622006-04-18 11:53:09 +00002322 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002323#endif
2324 }
2325 } else {
2326 PyErr_Clear();
2327 literal = 0;
2328 }
2329 if (literal) {
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002330 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002331 Py_INCREF(filter);
2332 filter_is_callable = 0;
2333 } else {
2334 /* not a literal; hand it over to the template compiler */
2335 filter = call(
Neal Norwitz94a9c092006-03-16 06:30:02 +00002336 SRE_PY_MODULE, "_subx",
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002337 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002338 );
2339 if (!filter)
2340 return NULL;
2341 filter_is_callable = PyCallable_Check(filter);
2342 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002343 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002344
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002345 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002346 if (!string) {
2347 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002348 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002349 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002350
2351 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002352 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002353 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002354 state_fini(&state);
2355 return NULL;
2356 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002357
2358 n = i = 0;
2359
2360 while (!count || n < count) {
2361
2362 state_reset(&state);
2363
2364 state.ptr = state.start;
2365
2366 if (state.charsize == 1) {
2367 status = sre_search(&state, PatternObject_GetCode(self));
2368 } else {
2369#if defined(HAVE_UNICODE)
2370 status = sre_usearch(&state, PatternObject_GetCode(self));
2371#endif
2372 }
2373
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00002374 if (PyErr_Occurred())
2375 goto error;
2376
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002377 if (status <= 0) {
2378 if (status == 0)
2379 break;
2380 pattern_error(status);
2381 goto error;
2382 }
Tim Peters3d563502006-01-21 02:47:53 +00002383
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002384 b = STATE_OFFSET(&state, state.start);
2385 e = STATE_OFFSET(&state, state.ptr);
2386
2387 if (i < b) {
2388 /* get segment before this match */
2389 item = PySequence_GetSlice(string, i, b);
2390 if (!item)
2391 goto error;
2392 status = PyList_Append(list, item);
2393 Py_DECREF(item);
2394 if (status < 0)
2395 goto error;
2396
2397 } else if (i == b && i == e && n > 0)
2398 /* ignore empty match on latest position */
2399 goto next;
2400
2401 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002402 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002403 match = pattern_new_match(self, &state, 1);
2404 if (!match)
2405 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002406 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002407 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002408 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002409 goto error;
2410 }
2411 item = PyObject_CallObject(filter, args);
2412 Py_DECREF(args);
2413 Py_DECREF(match);
2414 if (!item)
2415 goto error;
2416 } else {
2417 /* filter is literal string */
2418 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002419 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002420 }
2421
2422 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002423 if (item != Py_None) {
2424 status = PyList_Append(list, item);
2425 Py_DECREF(item);
2426 if (status < 0)
2427 goto error;
2428 }
Tim Peters3d563502006-01-21 02:47:53 +00002429
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002430 i = e;
2431 n = n + 1;
2432
2433next:
2434 /* move on */
2435 if (state.ptr == state.start)
2436 state.start = (void*) ((char*) state.ptr + state.charsize);
2437 else
2438 state.start = state.ptr;
2439
2440 }
2441
2442 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002443 if (i < state.endpos) {
2444 item = PySequence_GetSlice(string, i, state.endpos);
2445 if (!item)
2446 goto error;
2447 status = PyList_Append(list, item);
2448 Py_DECREF(item);
2449 if (status < 0)
2450 goto error;
2451 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002452
2453 state_fini(&state);
2454
Guido van Rossum4e173842001-12-07 04:25:10 +00002455 Py_DECREF(filter);
2456
Fredrik Lundhdac58492001-10-21 21:48:30 +00002457 /* convert list to single string (also removes list) */
Guido van Rossum1ff91d92007-09-10 22:02:25 +00002458 item = join_list(list, string);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002459
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002460 if (!item)
2461 return NULL;
2462
2463 if (subn)
Antoine Pitroub83575b2012-12-02 12:52:36 +01002464 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002465
2466 return item;
2467
2468error:
2469 Py_DECREF(list);
2470 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002471 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002472 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002473
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002474}
2475
2476static PyObject*
2477pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2478{
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002479 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002480 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002481 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002482 static char* kwlist[] = { "repl", "string", "count", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002483 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002484 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002485 return NULL;
2486
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002487 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002488}
2489
2490static PyObject*
2491pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2492{
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002493 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002494 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002495 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002496 static char* kwlist[] = { "repl", "string", "count", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002497 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002498 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002499 return NULL;
2500
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002501 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002502}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002503
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002504static PyObject*
Georg Brandl964f5972006-05-28 22:38:57 +00002505pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002506{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002507#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002508 PatternObject* copy;
2509 int offset;
2510
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002511 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2512 if (!copy)
2513 return NULL;
2514
2515 offset = offsetof(PatternObject, groups);
2516
2517 Py_XINCREF(self->groupindex);
2518 Py_XINCREF(self->indexgroup);
2519 Py_XINCREF(self->pattern);
2520
2521 memcpy((char*) copy + offset, (char*) self + offset,
2522 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00002523 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002524
2525 return (PyObject*) copy;
2526#else
2527 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2528 return NULL;
2529#endif
2530}
2531
2532static PyObject*
Georg Brandlfbef5882006-05-28 22:14:04 +00002533pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002534{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002535#ifdef USE_BUILTIN_COPY
2536 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002537
Georg Brandlfbef5882006-05-28 22:14:04 +00002538 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002539 if (!copy)
2540 return NULL;
2541
2542 if (!deepcopy(&copy->groupindex, memo) ||
2543 !deepcopy(&copy->indexgroup, memo) ||
2544 !deepcopy(&copy->pattern, memo)) {
2545 Py_DECREF(copy);
2546 return NULL;
2547 }
2548
2549#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002550 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2551 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002552#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002553}
2554
Raymond Hettinger94478742004-09-24 04:31:19 +00002555PyDoc_STRVAR(pattern_match_doc,
2556"match(string[, pos[, endpos]]) --> match object or None.\n\
2557 Matches zero or more characters at the beginning of the string");
2558
2559PyDoc_STRVAR(pattern_search_doc,
2560"search(string[, pos[, endpos]]) --> match object or None.\n\
2561 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlovc08ded92012-12-25 18:50:03 +02002562 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002563
2564PyDoc_STRVAR(pattern_split_doc,
2565"split(string[, maxsplit = 0]) --> list.\n\
2566 Split string by the occurrences of pattern.");
2567
2568PyDoc_STRVAR(pattern_findall_doc,
2569"findall(string[, pos[, endpos]]) --> list.\n\
2570 Return a list of all non-overlapping matches of pattern in string.");
2571
2572PyDoc_STRVAR(pattern_finditer_doc,
2573"finditer(string[, pos[, endpos]]) --> iterator.\n\
2574 Return an iterator over all non-overlapping matches for the \n\
2575 RE pattern in string. For each match, the iterator returns a\n\
2576 match object.");
2577
2578PyDoc_STRVAR(pattern_sub_doc,
2579"sub(repl, string[, count = 0]) --> newstring\n\
2580 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00002581 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002582
2583PyDoc_STRVAR(pattern_subn_doc,
2584"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2585 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2586 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00002587 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002588
2589PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2590
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002591static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00002592 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002593 pattern_match_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002594 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002595 pattern_search_doc},
2596 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
2597 pattern_sub_doc},
2598 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
2599 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002600 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002601 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002602 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002603 pattern_findall_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002604#if PY_VERSION_HEX >= 0x02020000
Raymond Hettinger94478742004-09-24 04:31:19 +00002605 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
2606 pattern_finditer_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002607#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002608 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Georg Brandlfbef5882006-05-28 22:14:04 +00002609 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2610 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002611 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002612};
2613
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05002614#define PAT_OFF(x) offsetof(PatternObject, x)
2615static PyMemberDef pattern_members[] = {
2616 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2617 {"flags", T_INT, PAT_OFF(flags), READONLY},
2618 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2619 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
2620 {NULL} /* Sentinel */
2621};
Guido van Rossumb700df92000-03-31 14:59:30 +00002622
2623statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002624 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002625 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002626 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002627 (destructor)pattern_dealloc, /*tp_dealloc*/
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05002628 0, /* tp_print */
2629 0, /* tp_getattrn */
Raymond Hettinger027bb632004-05-31 03:09:25 +00002630 0, /* tp_setattr */
2631 0, /* tp_compare */
2632 0, /* tp_repr */
2633 0, /* tp_as_number */
2634 0, /* tp_as_sequence */
2635 0, /* tp_as_mapping */
2636 0, /* tp_hash */
2637 0, /* tp_call */
2638 0, /* tp_str */
2639 0, /* tp_getattro */
2640 0, /* tp_setattro */
2641 0, /* tp_as_buffer */
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05002642 Py_TPFLAGS_DEFAULT, /* tp_flags */
Raymond Hettinger94478742004-09-24 04:31:19 +00002643 pattern_doc, /* tp_doc */
Raymond Hettinger027bb632004-05-31 03:09:25 +00002644 0, /* tp_traverse */
2645 0, /* tp_clear */
2646 0, /* tp_richcompare */
2647 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05002648 0, /* tp_iter */
2649 0, /* tp_iternext */
2650 pattern_methods, /* tp_methods */
2651 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00002652};
2653
Guido van Rossum8b762f02008-08-05 03:39:21 +00002654static int _validate(PatternObject *self); /* Forward */
2655
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002656static PyObject *
2657_compile(PyObject* self_, PyObject* args)
2658{
2659 /* "compile" pattern descriptor to pattern object */
2660
2661 PatternObject* self;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002662 Py_ssize_t i, n;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002663
2664 PyObject* pattern;
2665 int flags = 0;
2666 PyObject* code;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002667 Py_ssize_t groups = 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002668 PyObject* groupindex = NULL;
2669 PyObject* indexgroup = NULL;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002670 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002671 &PyList_Type, &code, &groups,
2672 &groupindex, &indexgroup))
2673 return NULL;
2674
2675 n = PyList_GET_SIZE(code);
Christian Heimes4956d2b2008-01-18 19:12:56 +00002676 /* coverity[ampersand_in_size] */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002677 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2678 if (!self)
2679 return NULL;
Antoine Pitrouefdddd32010-01-14 17:25:24 +00002680 self->weakreflist = NULL;
2681 self->pattern = NULL;
2682 self->groupindex = NULL;
2683 self->indexgroup = NULL;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002684
2685 self->codesize = n;
2686
2687 for (i = 0; i < n; i++) {
2688 PyObject *o = PyList_GET_ITEM(code, i);
2689 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
2690 : PyLong_AsUnsignedLong(o);
Antoine Pitroub83ea142012-11-20 22:30:42 +01002691 if (value == (unsigned long)-1 && PyErr_Occurred()) {
2692 if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
2693 PyErr_SetString(PyExc_OverflowError,
2694 "regular expression code size limit exceeded");
2695 }
2696 break;
2697 }
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002698 self->code[i] = (SRE_CODE) value;
2699 if ((unsigned long) self->code[i] != value) {
2700 PyErr_SetString(PyExc_OverflowError,
2701 "regular expression code size limit exceeded");
2702 break;
2703 }
2704 }
2705
2706 if (PyErr_Occurred()) {
Antoine Pitrouefdddd32010-01-14 17:25:24 +00002707 Py_DECREF(self);
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002708 return NULL;
2709 }
2710
2711 Py_INCREF(pattern);
2712 self->pattern = pattern;
2713
2714 self->flags = flags;
2715
2716 self->groups = groups;
2717
2718 Py_XINCREF(groupindex);
2719 self->groupindex = groupindex;
2720
2721 Py_XINCREF(indexgroup);
2722 self->indexgroup = indexgroup;
2723
2724 self->weakreflist = NULL;
2725
Guido van Rossum8b762f02008-08-05 03:39:21 +00002726 if (!_validate(self)) {
2727 Py_DECREF(self);
2728 return NULL;
2729 }
2730
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002731 return (PyObject*) self;
2732}
2733
Guido van Rossumb700df92000-03-31 14:59:30 +00002734/* -------------------------------------------------------------------- */
Guido van Rossum8b762f02008-08-05 03:39:21 +00002735/* Code validation */
2736
2737/* To learn more about this code, have a look at the _compile() function in
2738 Lib/sre_compile.py. The validation functions below checks the code array
2739 for conformance with the code patterns generated there.
2740
2741 The nice thing about the generated code is that it is position-independent:
2742 all jumps are relative jumps forward. Also, jumps don't cross each other:
2743 the target of a later jump is always earlier than the target of an earlier
2744 jump. IOW, this is okay:
2745
2746 J---------J-------T--------T
2747 \ \_____/ /
2748 \______________________/
2749
2750 but this is not:
2751
2752 J---------J-------T--------T
2753 \_________\_____/ /
2754 \____________/
2755
Serhiy Storchakafdb73ed2013-10-27 08:00:57 +02002756 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum8b762f02008-08-05 03:39:21 +00002757*/
2758
2759/* Defining this one enables tracing of the validator */
2760#undef VVERBOSE
2761
2762/* Trace macro for the validator */
2763#if defined(VVERBOSE)
2764#define VTRACE(v) printf v
2765#else
Senthil Kumarand5830682011-10-20 02:13:23 +08002766#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum8b762f02008-08-05 03:39:21 +00002767#endif
2768
2769/* Report failure */
2770#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
2771
2772/* Extract opcode, argument, or skip count from code array */
2773#define GET_OP \
2774 do { \
2775 VTRACE(("%p: ", code)); \
2776 if (code >= end) FAIL; \
2777 op = *code++; \
2778 VTRACE(("%lu (op)\n", (unsigned long)op)); \
2779 } while (0)
2780#define GET_ARG \
2781 do { \
2782 VTRACE(("%p= ", code)); \
2783 if (code >= end) FAIL; \
2784 arg = *code++; \
2785 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
2786 } while (0)
Guido van Rossume3c4fd92008-09-10 14:27:00 +00002787#define GET_SKIP_ADJ(adj) \
Guido van Rossum8b762f02008-08-05 03:39:21 +00002788 do { \
2789 VTRACE(("%p= ", code)); \
2790 if (code >= end) FAIL; \
2791 skip = *code; \
2792 VTRACE(("%lu (skip to %p)\n", \
2793 (unsigned long)skip, code+skip)); \
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +03002794 if (skip-adj > end-code) \
Guido van Rossum8b762f02008-08-05 03:39:21 +00002795 FAIL; \
2796 code++; \
2797 } while (0)
Guido van Rossume3c4fd92008-09-10 14:27:00 +00002798#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum8b762f02008-08-05 03:39:21 +00002799
2800static int
2801_validate_charset(SRE_CODE *code, SRE_CODE *end)
2802{
2803 /* Some variables are manipulated by the macros above */
2804 SRE_CODE op;
2805 SRE_CODE arg;
2806 SRE_CODE offset;
2807 int i;
2808
2809 while (code < end) {
2810 GET_OP;
2811 switch (op) {
2812
2813 case SRE_OP_NEGATE:
2814 break;
2815
2816 case SRE_OP_LITERAL:
2817 GET_ARG;
2818 break;
2819
2820 case SRE_OP_RANGE:
2821 GET_ARG;
2822 GET_ARG;
2823 break;
2824
2825 case SRE_OP_CHARSET:
2826 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +03002827 if (offset > end-code)
Guido van Rossum8b762f02008-08-05 03:39:21 +00002828 FAIL;
2829 code += offset;
2830 break;
2831
2832 case SRE_OP_BIGCHARSET:
2833 GET_ARG; /* Number of blocks */
2834 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +03002835 if (offset > end-code)
Guido van Rossum8b762f02008-08-05 03:39:21 +00002836 FAIL;
2837 /* Make sure that each byte points to a valid block */
2838 for (i = 0; i < 256; i++) {
2839 if (((unsigned char *)code)[i] >= arg)
2840 FAIL;
2841 }
2842 code += offset;
2843 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +03002844 if (offset > end-code)
Guido van Rossum8b762f02008-08-05 03:39:21 +00002845 FAIL;
2846 code += offset;
2847 break;
2848
2849 case SRE_OP_CATEGORY:
2850 GET_ARG;
2851 switch (arg) {
2852 case SRE_CATEGORY_DIGIT:
2853 case SRE_CATEGORY_NOT_DIGIT:
2854 case SRE_CATEGORY_SPACE:
2855 case SRE_CATEGORY_NOT_SPACE:
2856 case SRE_CATEGORY_WORD:
2857 case SRE_CATEGORY_NOT_WORD:
2858 case SRE_CATEGORY_LINEBREAK:
2859 case SRE_CATEGORY_NOT_LINEBREAK:
2860 case SRE_CATEGORY_LOC_WORD:
2861 case SRE_CATEGORY_LOC_NOT_WORD:
2862 case SRE_CATEGORY_UNI_DIGIT:
2863 case SRE_CATEGORY_UNI_NOT_DIGIT:
2864 case SRE_CATEGORY_UNI_SPACE:
2865 case SRE_CATEGORY_UNI_NOT_SPACE:
2866 case SRE_CATEGORY_UNI_WORD:
2867 case SRE_CATEGORY_UNI_NOT_WORD:
2868 case SRE_CATEGORY_UNI_LINEBREAK:
2869 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
2870 break;
2871 default:
2872 FAIL;
2873 }
2874 break;
2875
2876 default:
2877 FAIL;
2878
2879 }
2880 }
2881
2882 return 1;
2883}
2884
2885static int
2886_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2887{
2888 /* Some variables are manipulated by the macros above */
2889 SRE_CODE op;
2890 SRE_CODE arg;
2891 SRE_CODE skip;
2892
2893 VTRACE(("code=%p, end=%p\n", code, end));
2894
2895 if (code > end)
2896 FAIL;
2897
2898 while (code < end) {
2899 GET_OP;
2900 switch (op) {
2901
2902 case SRE_OP_MARK:
2903 /* We don't check whether marks are properly nested; the
2904 sre_match() code is robust even if they don't, and the worst
2905 you can get is nonsensical match results. */
2906 GET_ARG;
2907 if (arg > 2*groups+1) {
2908 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
2909 FAIL;
2910 }
2911 break;
2912
2913 case SRE_OP_LITERAL:
2914 case SRE_OP_NOT_LITERAL:
2915 case SRE_OP_LITERAL_IGNORE:
2916 case SRE_OP_NOT_LITERAL_IGNORE:
2917 GET_ARG;
2918 /* The arg is just a character, nothing to check */
2919 break;
2920
2921 case SRE_OP_SUCCESS:
2922 case SRE_OP_FAILURE:
2923 /* Nothing to check; these normally end the matching process */
2924 break;
2925
2926 case SRE_OP_AT:
2927 GET_ARG;
2928 switch (arg) {
2929 case SRE_AT_BEGINNING:
2930 case SRE_AT_BEGINNING_STRING:
2931 case SRE_AT_BEGINNING_LINE:
2932 case SRE_AT_END:
2933 case SRE_AT_END_LINE:
2934 case SRE_AT_END_STRING:
2935 case SRE_AT_BOUNDARY:
2936 case SRE_AT_NON_BOUNDARY:
2937 case SRE_AT_LOC_BOUNDARY:
2938 case SRE_AT_LOC_NON_BOUNDARY:
2939 case SRE_AT_UNI_BOUNDARY:
2940 case SRE_AT_UNI_NON_BOUNDARY:
2941 break;
2942 default:
2943 FAIL;
2944 }
2945 break;
2946
2947 case SRE_OP_ANY:
2948 case SRE_OP_ANY_ALL:
2949 /* These have no operands */
2950 break;
2951
2952 case SRE_OP_IN:
2953 case SRE_OP_IN_IGNORE:
2954 GET_SKIP;
2955 /* Stop 1 before the end; we check the FAILURE below */
2956 if (!_validate_charset(code, code+skip-2))
2957 FAIL;
2958 if (code[skip-2] != SRE_OP_FAILURE)
2959 FAIL;
2960 code += skip-1;
2961 break;
2962
2963 case SRE_OP_INFO:
2964 {
2965 /* A minimal info field is
2966 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
2967 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
2968 more follows. */
Brett Cannon8ffe7bb2010-05-03 23:51:28 +00002969 SRE_CODE flags, i;
Guido van Rossum8b762f02008-08-05 03:39:21 +00002970 SRE_CODE *newcode;
2971 GET_SKIP;
2972 newcode = code+skip-1;
2973 GET_ARG; flags = arg;
Brett Cannon8ffe7bb2010-05-03 23:51:28 +00002974 GET_ARG; /* min */
2975 GET_ARG; /* max */
Guido van Rossum8b762f02008-08-05 03:39:21 +00002976 /* Check that only valid flags are present */
2977 if ((flags & ~(SRE_INFO_PREFIX |
2978 SRE_INFO_LITERAL |
2979 SRE_INFO_CHARSET)) != 0)
2980 FAIL;
2981 /* PREFIX and CHARSET are mutually exclusive */
2982 if ((flags & SRE_INFO_PREFIX) &&
2983 (flags & SRE_INFO_CHARSET))
2984 FAIL;
2985 /* LITERAL implies PREFIX */
2986 if ((flags & SRE_INFO_LITERAL) &&
2987 !(flags & SRE_INFO_PREFIX))
2988 FAIL;
2989 /* Validate the prefix */
2990 if (flags & SRE_INFO_PREFIX) {
Brett Cannon8ffe7bb2010-05-03 23:51:28 +00002991 SRE_CODE prefix_len;
Guido van Rossum8b762f02008-08-05 03:39:21 +00002992 GET_ARG; prefix_len = arg;
Brett Cannon8ffe7bb2010-05-03 23:51:28 +00002993 GET_ARG; /* prefix skip */
Guido van Rossum8b762f02008-08-05 03:39:21 +00002994 /* Here comes the prefix string */
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +03002995 if (prefix_len > newcode-code)
Guido van Rossum8b762f02008-08-05 03:39:21 +00002996 FAIL;
2997 code += prefix_len;
2998 /* And here comes the overlap table */
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +03002999 if (prefix_len > newcode-code)
Guido van Rossum8b762f02008-08-05 03:39:21 +00003000 FAIL;
3001 /* Each overlap value should be < prefix_len */
3002 for (i = 0; i < prefix_len; i++) {
3003 if (code[i] >= prefix_len)
3004 FAIL;
3005 }
3006 code += prefix_len;
3007 }
3008 /* Validate the charset */
3009 if (flags & SRE_INFO_CHARSET) {
3010 if (!_validate_charset(code, newcode-1))
3011 FAIL;
3012 if (newcode[-1] != SRE_OP_FAILURE)
3013 FAIL;
3014 code = newcode;
3015 }
3016 else if (code != newcode) {
3017 VTRACE(("code=%p, newcode=%p\n", code, newcode));
3018 FAIL;
3019 }
3020 }
3021 break;
3022
3023 case SRE_OP_BRANCH:
3024 {
3025 SRE_CODE *target = NULL;
3026 for (;;) {
3027 GET_SKIP;
3028 if (skip == 0)
3029 break;
3030 /* Stop 2 before the end; we check the JUMP below */
3031 if (!_validate_inner(code, code+skip-3, groups))
3032 FAIL;
3033 code += skip-3;
3034 /* Check that it ends with a JUMP, and that each JUMP
3035 has the same target */
3036 GET_OP;
3037 if (op != SRE_OP_JUMP)
3038 FAIL;
3039 GET_SKIP;
3040 if (target == NULL)
3041 target = code+skip-1;
3042 else if (code+skip-1 != target)
3043 FAIL;
3044 }
3045 }
3046 break;
3047
3048 case SRE_OP_REPEAT_ONE:
3049 case SRE_OP_MIN_REPEAT_ONE:
3050 {
3051 SRE_CODE min, max;
3052 GET_SKIP;
3053 GET_ARG; min = arg;
3054 GET_ARG; max = arg;
3055 if (min > max)
3056 FAIL;
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02003057 if (max > SRE_MAXREPEAT)
Guido van Rossum8b762f02008-08-05 03:39:21 +00003058 FAIL;
Guido van Rossum8b762f02008-08-05 03:39:21 +00003059 if (!_validate_inner(code, code+skip-4, groups))
3060 FAIL;
3061 code += skip-4;
3062 GET_OP;
3063 if (op != SRE_OP_SUCCESS)
3064 FAIL;
3065 }
3066 break;
3067
3068 case SRE_OP_REPEAT:
3069 {
3070 SRE_CODE min, max;
3071 GET_SKIP;
3072 GET_ARG; min = arg;
3073 GET_ARG; max = arg;
3074 if (min > max)
3075 FAIL;
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02003076 if (max > SRE_MAXREPEAT)
Guido van Rossum8b762f02008-08-05 03:39:21 +00003077 FAIL;
Guido van Rossum8b762f02008-08-05 03:39:21 +00003078 if (!_validate_inner(code, code+skip-3, groups))
3079 FAIL;
3080 code += skip-3;
3081 GET_OP;
3082 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
3083 FAIL;
3084 }
3085 break;
3086
3087 case SRE_OP_GROUPREF:
3088 case SRE_OP_GROUPREF_IGNORE:
3089 GET_ARG;
3090 if (arg >= groups)
3091 FAIL;
3092 break;
3093
3094 case SRE_OP_GROUPREF_EXISTS:
3095 /* The regex syntax for this is: '(?(group)then|else)', where
3096 'group' is either an integer group number or a group name,
3097 'then' and 'else' are sub-regexes, and 'else' is optional. */
3098 GET_ARG;
3099 if (arg >= groups)
3100 FAIL;
Guido van Rossume3c4fd92008-09-10 14:27:00 +00003101 GET_SKIP_ADJ(1);
Guido van Rossum8b762f02008-08-05 03:39:21 +00003102 code--; /* The skip is relative to the first arg! */
3103 /* There are two possibilities here: if there is both a 'then'
3104 part and an 'else' part, the generated code looks like:
3105
3106 GROUPREF_EXISTS
3107 <group>
3108 <skipyes>
3109 ...then part...
3110 JUMP
3111 <skipno>
3112 (<skipyes> jumps here)
3113 ...else part...
3114 (<skipno> jumps here)
3115
3116 If there is only a 'then' part, it looks like:
3117
3118 GROUPREF_EXISTS
3119 <group>
3120 <skip>
3121 ...then part...
3122 (<skip> jumps here)
3123
3124 There is no direct way to decide which it is, and we don't want
3125 to allow arbitrary jumps anywhere in the code; so we just look
3126 for a JUMP opcode preceding our skip target.
3127 */
Serhiy Storchaka616f2fe2013-04-13 21:15:10 +03003128 if (skip >= 3 && skip-3 < end-code &&
Guido van Rossum8b762f02008-08-05 03:39:21 +00003129 code[skip-3] == SRE_OP_JUMP)
3130 {
3131 VTRACE(("both then and else parts present\n"));
3132 if (!_validate_inner(code+1, code+skip-3, groups))
3133 FAIL;
3134 code += skip-2; /* Position after JUMP, at <skipno> */
3135 GET_SKIP;
3136 if (!_validate_inner(code, code+skip-1, groups))
3137 FAIL;
3138 code += skip-1;
3139 }
3140 else {
3141 VTRACE(("only a then part present\n"));
3142 if (!_validate_inner(code+1, code+skip-1, groups))
3143 FAIL;
3144 code += skip-1;
3145 }
3146 break;
3147
3148 case SRE_OP_ASSERT:
3149 case SRE_OP_ASSERT_NOT:
3150 GET_SKIP;
3151 GET_ARG; /* 0 for lookahead, width for lookbehind */
3152 code--; /* Back up over arg to simplify math below */
3153 if (arg & 0x80000000)
3154 FAIL; /* Width too large */
3155 /* Stop 1 before the end; we check the SUCCESS below */
3156 if (!_validate_inner(code+1, code+skip-2, groups))
3157 FAIL;
3158 code += skip-2;
3159 GET_OP;
3160 if (op != SRE_OP_SUCCESS)
3161 FAIL;
3162 break;
3163
3164 default:
3165 FAIL;
3166
3167 }
3168 }
3169
3170 VTRACE(("okay\n"));
3171 return 1;
3172}
3173
3174static int
3175_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
3176{
3177 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
3178 FAIL;
3179 if (groups == 0) /* fix for simplejson */
3180 groups = 100; /* 100 groups should always be safe */
3181 return _validate_inner(code, end-1, groups);
3182}
3183
3184static int
3185_validate(PatternObject *self)
3186{
3187 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
3188 {
3189 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
3190 return 0;
3191 }
3192 else
3193 VTRACE(("Success!\n"));
3194 return 1;
3195}
3196
3197/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00003198/* match methods */
3199
3200static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003201match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00003202{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003203 Py_XDECREF(self->regs);
3204 Py_XDECREF(self->string);
3205 Py_DECREF(self->pattern);
3206 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00003207}
3208
3209static PyObject*
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003210match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00003211{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003212 if (index < 0 || index >= self->groups) {
3213 /* raise IndexError if we were given a bad group number */
3214 PyErr_SetString(
3215 PyExc_IndexError,
3216 "no such group"
3217 );
3218 return NULL;
3219 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003220
Fredrik Lundh6f013982000-07-03 18:44:21 +00003221 index *= 2;
3222
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003223 if (self->string == Py_None || self->mark[index] < 0) {
3224 /* return default value if the string or group is undefined */
3225 Py_INCREF(def);
3226 return def;
3227 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003228
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003229 return PySequence_GetSlice(
3230 self->string, self->mark[index], self->mark[index+1]
3231 );
Guido van Rossumb700df92000-03-31 14:59:30 +00003232}
3233
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003234static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003235match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00003236{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003237 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00003238
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003239 if (PyInt_Check(index))
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003240 return PyInt_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00003241
Fredrik Lundh6f013982000-07-03 18:44:21 +00003242 i = -1;
3243
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003244 if (self->pattern->groupindex) {
3245 index = PyObject_GetItem(self->pattern->groupindex, index);
3246 if (index) {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003247 if (PyInt_Check(index) || PyLong_Check(index))
3248 i = PyInt_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00003249 Py_DECREF(index);
3250 } else
3251 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003252 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00003253
3254 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003255}
3256
3257static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00003258match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003259{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003260 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00003261}
3262
3263static PyObject*
Georg Brandlfbef5882006-05-28 22:14:04 +00003264match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003265{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003266 /* delegate to Python code */
3267 return call(
Neal Norwitz94a9c092006-03-16 06:30:02 +00003268 SRE_PY_MODULE, "_expand",
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003269 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003270 );
3271}
3272
3273static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003274match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003275{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003276 PyObject* result;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003277 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00003278
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003279 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00003280
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003281 switch (size) {
3282 case 0:
3283 result = match_getslice(self, Py_False, Py_None);
3284 break;
3285 case 1:
3286 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
3287 break;
3288 default:
3289 /* fetch multiple items */
3290 result = PyTuple_New(size);
3291 if (!result)
3292 return NULL;
3293 for (i = 0; i < size; i++) {
3294 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00003295 self, PyTuple_GET_ITEM(args, i), Py_None
3296 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003297 if (!item) {
3298 Py_DECREF(result);
3299 return NULL;
3300 }
3301 PyTuple_SET_ITEM(result, i, item);
3302 }
3303 break;
3304 }
3305 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003306}
3307
3308static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00003309match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00003310{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003311 PyObject* result;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003312 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00003313
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003314 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003315 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00003316 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003317 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003318
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003319 result = PyTuple_New(self->groups-1);
3320 if (!result)
3321 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003322
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003323 for (index = 1; index < self->groups; index++) {
3324 PyObject* item;
3325 item = match_getslice_by_index(self, index, def);
3326 if (!item) {
3327 Py_DECREF(result);
3328 return NULL;
3329 }
3330 PyTuple_SET_ITEM(result, index-1, item);
3331 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003332
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003333 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003334}
3335
3336static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00003337match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00003338{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003339 PyObject* result;
3340 PyObject* keys;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003341 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00003342
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003343 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003344 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00003345 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003346 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003347
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003348 result = PyDict_New();
3349 if (!result || !self->pattern->groupindex)
3350 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003351
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003352 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003353 if (!keys)
3354 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00003355
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003356 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00003357 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003358 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00003359 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003360 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003361 if (!key)
3362 goto failed;
3363 value = match_getslice(self, key, def);
3364 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003365 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003366 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003367 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00003368 status = PyDict_SetItem(result, key, value);
3369 Py_DECREF(value);
3370 if (status < 0)
3371 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003372 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003374 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00003375
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003376 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00003377
3378failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00003379 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003380 Py_DECREF(result);
3381 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003382}
3383
3384static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003385match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003386{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003387 Py_ssize_t index;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003389 PyObject* index_ = Py_False; /* zero */
Georg Brandl96a8c392006-05-29 21:04:52 +00003390 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003391 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003392
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003393 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003395 if (index < 0 || index >= self->groups) {
3396 PyErr_SetString(
3397 PyExc_IndexError,
3398 "no such group"
3399 );
3400 return NULL;
3401 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003402
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003403 /* mark is -1 if group is undefined */
Benjamin Peterson9dccb012013-01-10 10:37:47 -06003404 return PyInt_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00003405}
3406
3407static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003408match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003409{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003410 Py_ssize_t index;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003411
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003412 PyObject* index_ = Py_False; /* zero */
Georg Brandl96a8c392006-05-29 21:04:52 +00003413 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003414 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003415
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003416 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003417
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003418 if (index < 0 || index >= self->groups) {
3419 PyErr_SetString(
3420 PyExc_IndexError,
3421 "no such group"
3422 );
3423 return NULL;
3424 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003425
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003426 /* mark is -1 if group is undefined */
Benjamin Peterson9dccb012013-01-10 10:37:47 -06003427 return PyInt_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003428}
3429
3430LOCAL(PyObject*)
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003431_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003432{
3433 PyObject* pair;
3434 PyObject* item;
3435
3436 pair = PyTuple_New(2);
3437 if (!pair)
3438 return NULL;
3439
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003440 item = PyInt_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003441 if (!item)
3442 goto error;
3443 PyTuple_SET_ITEM(pair, 0, item);
3444
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003445 item = PyInt_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003446 if (!item)
3447 goto error;
3448 PyTuple_SET_ITEM(pair, 1, item);
3449
3450 return pair;
3451
3452 error:
3453 Py_DECREF(pair);
3454 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003455}
3456
3457static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003458match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003459{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003460 Py_ssize_t index;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003461
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003462 PyObject* index_ = Py_False; /* zero */
Georg Brandl96a8c392006-05-29 21:04:52 +00003463 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003464 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003465
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003466 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003467
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003468 if (index < 0 || index >= self->groups) {
3469 PyErr_SetString(
3470 PyExc_IndexError,
3471 "no such group"
3472 );
3473 return NULL;
3474 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003475
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003476 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003477 return _pair(self->mark[index*2], self->mark[index*2+1]);
3478}
3479
3480static PyObject*
3481match_regs(MatchObject* self)
3482{
3483 PyObject* regs;
3484 PyObject* item;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003485 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003486
3487 regs = PyTuple_New(self->groups);
3488 if (!regs)
3489 return NULL;
3490
3491 for (index = 0; index < self->groups; index++) {
3492 item = _pair(self->mark[index*2], self->mark[index*2+1]);
3493 if (!item) {
3494 Py_DECREF(regs);
3495 return NULL;
3496 }
3497 PyTuple_SET_ITEM(regs, index, item);
3498 }
3499
3500 Py_INCREF(regs);
3501 self->regs = regs;
3502
3503 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00003504}
3505
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003506static PyObject*
Georg Brandl964f5972006-05-28 22:38:57 +00003507match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003508{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003509#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003510 MatchObject* copy;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003511 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00003512
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003513 slots = 2 * (self->pattern->groups+1);
3514
3515 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3516 if (!copy)
3517 return NULL;
3518
3519 /* this value a constant, but any compiler should be able to
3520 figure that out all by itself */
3521 offset = offsetof(MatchObject, string);
3522
3523 Py_XINCREF(self->pattern);
3524 Py_XINCREF(self->string);
3525 Py_XINCREF(self->regs);
3526
3527 memcpy((char*) copy + offset, (char*) self + offset,
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003528 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003529
3530 return (PyObject*) copy;
3531#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003532 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003533 return NULL;
3534#endif
3535}
3536
3537static PyObject*
Georg Brandlfbef5882006-05-28 22:14:04 +00003538match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003539{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003540#ifdef USE_BUILTIN_COPY
3541 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00003542
Georg Brandlfbef5882006-05-28 22:14:04 +00003543 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003544 if (!copy)
3545 return NULL;
3546
3547 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3548 !deepcopy(&copy->string, memo) ||
3549 !deepcopy(&copy->regs, memo)) {
3550 Py_DECREF(copy);
3551 return NULL;
3552 }
3553
3554#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003555 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3556 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003557#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003558}
3559
Andrew Svetlov1c6c90f2012-12-23 20:09:01 +02003560PyDoc_STRVAR(match_doc,
3561"The result of re.match() and re.search().\n\
3562Match objects always have a boolean value of True.");
3563
3564PyDoc_STRVAR(match_group_doc,
3565"group([group1, ...]) -> str or tuple.\n\
3566 Return subgroup(s) of the match by indices or names.\n\
3567 For 0 returns the entire match.");
3568
3569PyDoc_STRVAR(match_start_doc,
3570"start([group=0]) -> int.\n\
3571 Return index of the start of the substring matched by group.");
3572
3573PyDoc_STRVAR(match_end_doc,
3574"end([group=0]) -> int.\n\
3575 Return index of the end of the substring matched by group.");
3576
3577PyDoc_STRVAR(match_span_doc,
3578"span([group]) -> tuple.\n\
3579 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
3580
3581PyDoc_STRVAR(match_groups_doc,
3582"groups([default=None]) -> tuple.\n\
3583 Return a tuple containing all the subgroups of the match, from 1.\n\
3584 The default argument is used for groups\n\
3585 that did not participate in the match");
3586
3587PyDoc_STRVAR(match_groupdict_doc,
3588"groupdict([default=None]) -> dict.\n\
3589 Return a dictionary containing all the named subgroups of the match,\n\
3590 keyed by the subgroup name. The default argument is used for groups\n\
3591 that did not participate in the match");
3592
3593PyDoc_STRVAR(match_expand_doc,
3594"expand(template) -> str.\n\
3595 Return the string obtained by doing backslash substitution\n\
3596 on the string template, as done by the sub() method.");
3597
3598static PyMethodDef match_methods[] = {
3599 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
3600 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
3601 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
3602 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
3603 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
3604 match_groups_doc},
3605 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
3606 match_groupdict_doc},
3607 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Georg Brandlfbef5882006-05-28 22:14:04 +00003608 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3609 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003610 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003611};
3612
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003613static PyObject *
3614match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00003615{
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003616 if (self->lastindex >= 0)
Benjamin Peterson9dccb012013-01-10 10:37:47 -06003617 return PyInt_FromSsize_t(self->lastindex);
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003618 Py_INCREF(Py_None);
3619 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00003620}
3621
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003622static PyObject *
3623match_lastgroup_get(MatchObject *self)
3624{
3625 if (self->pattern->indexgroup && self->lastindex >= 0) {
3626 PyObject* result = PySequence_GetItem(
3627 self->pattern->indexgroup, self->lastindex
3628 );
3629 if (result)
3630 return result;
3631 PyErr_Clear();
3632 }
3633 Py_INCREF(Py_None);
3634 return Py_None;
3635}
3636
3637static PyObject *
3638match_regs_get(MatchObject *self)
3639{
3640 if (self->regs) {
3641 Py_INCREF(self->regs);
3642 return self->regs;
3643 } else
3644 return match_regs(self);
3645}
3646
3647static PyGetSetDef match_getset[] = {
3648 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
3649 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
3650 {"regs", (getter)match_regs_get, (setter)NULL},
3651 {NULL}
3652};
3653
3654#define MATCH_OFF(x) offsetof(MatchObject, x)
3655static PyMemberDef match_members[] = {
3656 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
3657 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
3658 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
3659 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
3660 {NULL}
3661};
3662
3663
Guido van Rossumb700df92000-03-31 14:59:30 +00003664/* FIXME: implement setattr("string", None) as a special case (to
3665 detach the associated string, if any */
3666
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003667static PyTypeObject Match_Type = {
3668 PyVarObject_HEAD_INIT(NULL, 0)
3669 "_" SRE_MODULE ".SRE_Match",
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003670 sizeof(MatchObject), sizeof(Py_ssize_t),
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003671 (destructor)match_dealloc, /* tp_dealloc */
3672 0, /* tp_print */
3673 0, /* tp_getattr */
3674 0, /* tp_setattr */
3675 0, /* tp_compare */
3676 0, /* tp_repr */
3677 0, /* tp_as_number */
3678 0, /* tp_as_sequence */
3679 0, /* tp_as_mapping */
3680 0, /* tp_hash */
3681 0, /* tp_call */
3682 0, /* tp_str */
3683 0, /* tp_getattro */
3684 0, /* tp_setattro */
3685 0, /* tp_as_buffer */
3686 Py_TPFLAGS_DEFAULT,
Andrew Svetlov1c6c90f2012-12-23 20:09:01 +02003687 match_doc, /* tp_doc */
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003688 0, /* tp_traverse */
3689 0, /* tp_clear */
3690 0, /* tp_richcompare */
3691 0, /* tp_weaklistoffset */
3692 0, /* tp_iter */
3693 0, /* tp_iternext */
3694 match_methods, /* tp_methods */
3695 match_members, /* tp_members */
3696 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00003697};
3698
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003699static PyObject*
3700pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3701{
3702 /* create match object (from state object) */
3703
3704 MatchObject* match;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003705 Py_ssize_t i, j;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003706 char* base;
3707 int n;
3708
3709 if (status > 0) {
3710
3711 /* create match object (with room for extra group marks) */
Christian Heimes4956d2b2008-01-18 19:12:56 +00003712 /* coverity[ampersand_in_size] */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003713 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3714 2*(pattern->groups+1));
3715 if (!match)
3716 return NULL;
3717
3718 Py_INCREF(pattern);
3719 match->pattern = pattern;
3720
3721 Py_INCREF(state->string);
3722 match->string = state->string;
3723
3724 match->regs = NULL;
3725 match->groups = pattern->groups+1;
3726
3727 /* fill in group slices */
3728
3729 base = (char*) state->beginning;
3730 n = state->charsize;
3731
3732 match->mark[0] = ((char*) state->start - base) / n;
3733 match->mark[1] = ((char*) state->ptr - base) / n;
3734
3735 for (i = j = 0; i < pattern->groups; i++, j+=2)
3736 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3737 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3738 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3739 } else
3740 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3741
3742 match->pos = state->pos;
3743 match->endpos = state->endpos;
3744
3745 match->lastindex = state->lastindex;
3746
3747 return (PyObject*) match;
3748
3749 } else if (status == 0) {
3750
3751 /* no match */
3752 Py_INCREF(Py_None);
3753 return Py_None;
3754
3755 }
3756
3757 /* internal error */
3758 pattern_error(status);
3759 return NULL;
3760}
3761
3762
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003763/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003764/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003765
3766static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003767scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003768{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003769 state_fini(&self->state);
Antoine Pitrouefdddd32010-01-14 17:25:24 +00003770 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003771 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003772}
3773
3774static PyObject*
Georg Brandl964f5972006-05-28 22:38:57 +00003775scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003776{
3777 SRE_STATE* state = &self->state;
3778 PyObject* match;
3779 int status;
3780
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003781 state_reset(state);
3782
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003783 state->ptr = state->start;
3784
3785 if (state->charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003786 status = sre_match(state, PatternObject_GetCode(self->pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003787 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003788#if defined(HAVE_UNICODE)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003789 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003790#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003791 }
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00003792 if (PyErr_Occurred())
3793 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003794
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003795 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003796 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003797
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003798 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003799 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003800 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003801 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003802
3803 return match;
3804}
3805
3806
3807static PyObject*
Georg Brandl964f5972006-05-28 22:38:57 +00003808scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003809{
3810 SRE_STATE* state = &self->state;
3811 PyObject* match;
3812 int status;
3813
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003814 state_reset(state);
3815
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003816 state->ptr = state->start;
3817
3818 if (state->charsize == 1) {
3819 status = sre_search(state, PatternObject_GetCode(self->pattern));
3820 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003821#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003822 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003823#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003824 }
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00003825 if (PyErr_Occurred())
3826 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003827
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003828 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003829 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003830
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003831 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003832 state->start = (void*) ((char*) state->ptr + state->charsize);
3833 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003834 state->start = state->ptr;
3835
3836 return match;
3837}
3838
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003839static PyMethodDef scanner_methods[] = {
Georg Brandlfbef5882006-05-28 22:14:04 +00003840 {"match", (PyCFunction) scanner_match, METH_NOARGS},
3841 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003842 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003843};
3844
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003845#define SCAN_OFF(x) offsetof(ScannerObject, x)
3846static PyMemberDef scanner_members[] = {
3847 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
3848 {NULL} /* Sentinel */
3849};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003850
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003851statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003852 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00003853 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003854 sizeof(ScannerObject), 0,
3855 (destructor)scanner_dealloc, /*tp_dealloc*/
Benjamin Peterson6116d4a2011-05-17 18:31:20 -05003856 0, /* tp_print */
3857 0, /* tp_getattr */
3858 0, /* tp_setattr */
3859 0, /* tp_reserved */
3860 0, /* tp_repr */
3861 0, /* tp_as_number */
3862 0, /* tp_as_sequence */
3863 0, /* tp_as_mapping */
3864 0, /* tp_hash */
3865 0, /* tp_call */
3866 0, /* tp_str */
3867 0, /* tp_getattro */
3868 0, /* tp_setattro */
3869 0, /* tp_as_buffer */
3870 Py_TPFLAGS_DEFAULT, /* tp_flags */
3871 0, /* tp_doc */
3872 0, /* tp_traverse */
3873 0, /* tp_clear */
3874 0, /* tp_richcompare */
3875 0, /* tp_weaklistoffset */
3876 0, /* tp_iter */
3877 0, /* tp_iternext */
3878 scanner_methods, /* tp_methods */
3879 scanner_members, /* tp_members */
3880 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003881};
3882
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003883static PyObject*
3884pattern_scanner(PatternObject* pattern, PyObject* args)
3885{
3886 /* create search state object */
3887
3888 ScannerObject* self;
3889
3890 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003891 Py_ssize_t start = 0;
3892 Py_ssize_t end = PY_SSIZE_T_MAX;
3893 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003894 return NULL;
3895
3896 /* create scanner object */
3897 self = PyObject_NEW(ScannerObject, &Scanner_Type);
3898 if (!self)
3899 return NULL;
Antoine Pitrouefdddd32010-01-14 17:25:24 +00003900 self->pattern = NULL;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003901
3902 string = state_init(&self->state, pattern, string, start, end);
3903 if (!string) {
Antoine Pitrouefdddd32010-01-14 17:25:24 +00003904 Py_DECREF(self);
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003905 return NULL;
3906 }
3907
3908 Py_INCREF(pattern);
3909 self->pattern = (PyObject*) pattern;
3910
3911 return (PyObject*) self;
3912}
3913
Guido van Rossumb700df92000-03-31 14:59:30 +00003914static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00003915 {"compile", _compile, METH_VARARGS},
Georg Brandlfbef5882006-05-28 22:14:04 +00003916 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00003917 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003918 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003919};
3920
Tim Peters3d563502006-01-21 02:47:53 +00003921#if PY_VERSION_HEX < 0x02030000
Andrew M. Kuchlingc24fe362003-04-30 13:09:08 +00003922DL_EXPORT(void) init_sre(void)
3923#else
Mark Hammond8235ea12002-07-19 06:55:41 +00003924PyMODINIT_FUNC init_sre(void)
Andrew M. Kuchlingc24fe362003-04-30 13:09:08 +00003925#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00003926{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003927 PyObject* m;
3928 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00003929 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003930
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003931 /* Patch object types */
Benjamin Petersone266d3e2010-04-06 03:34:09 +00003932 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
3933 PyType_Ready(&Scanner_Type))
3934 return;
Guido van Rossumb700df92000-03-31 14:59:30 +00003935
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00003936 m = Py_InitModule("_" SRE_MODULE, _functions);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00003937 if (m == NULL)
3938 return;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003939 d = PyModule_GetDict(m);
3940
Fredrik Lundh21009b92001-09-18 18:47:09 +00003941 x = PyInt_FromLong(SRE_MAGIC);
3942 if (x) {
3943 PyDict_SetItemString(d, "MAGIC", x);
3944 Py_DECREF(x);
3945 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003946
Martin v. Löwis78e2f062003-04-19 12:56:08 +00003947 x = PyInt_FromLong(sizeof(SRE_CODE));
3948 if (x) {
3949 PyDict_SetItemString(d, "CODESIZE", x);
3950 Py_DECREF(x);
3951 }
3952
Serhiy Storchakae18e05c2013-02-16 16:47:15 +02003953 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
3954 if (x) {
3955 PyDict_SetItemString(d, "MAXREPEAT", x);
3956 Py_DECREF(x);
3957 }
3958
Gregory P. Smithdd96db62008-06-09 04:58:54 +00003959 x = PyString_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00003960 if (x) {
3961 PyDict_SetItemString(d, "copyright", x);
3962 Py_DECREF(x);
3963 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003964}
3965
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003966#endif /* !defined(SRE_RECURSIVE) */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00003967
3968/* vim:ts=4:sw=4:et
3969*/