blob: e76144de598002db0ca3bfdaf94bfdc1d705521a [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
Fredrik Lundhebc37b22000-10-28 19:30:41 +00009 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000010 * 2000-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000011 * 2000-09-20 fl added expand method
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000012 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000013 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000014 * 2001-04-28 fl added __copy__ methods (work in progress)
Fredrik Lundh09705f02002-11-22 12:46:35 +000015 * 2001-05-14 fl fixes for 1.5.2 compatibility
Fredrik Lundhf71ae462001-07-02 17:04:48 +000016 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000017 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000018 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000019 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh703ce812001-10-24 22:16:30 +000020 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000021 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Fredrik Lundh09705f02002-11-22 12:46:35 +000022 * 2002-11-09 fl fixed empty sub/subn return type
Martin v. Löwis78e2f062003-04-19 12:56:08 +000023 * 2003-04-18 mvl fully support 4-byte codes
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +000024 * 2003-10-17 gn implemented non recursive scheme
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000026 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000027 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * other compatibility work.
35 */
36
37#ifndef SRE_RECURSIVE
38
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000039static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000040 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000041
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042#define PY_SSIZE_T_CLEAN
43
Guido van Rossumb700df92000-03-31 14:59:30 +000044#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000045#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000046
47#include "sre.h"
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000052#if !defined(SRE_MODULE)
53#define SRE_MODULE "sre"
54#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000055
Thomas Wouters9ada3d62006-04-21 09:47:09 +000056#define SRE_PY_MODULE "re"
57
Guido van Rossumb700df92000-03-31 14:59:30 +000058/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000060
Fredrik Lundh22d25462000-07-01 17:50:59 +000061/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000062#define HAVE_UNICODE
Fredrik Lundh436c3d582000-06-29 08:58:44 +000063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000065/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000066
67/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000068#define USE_FAST_SEARCH
69
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000070/* enables copy/deepcopy handling (work in progress) */
71#undef USE_BUILTIN_COPY
72
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000073#if PY_VERSION_HEX < 0x01060000
74#define PyObject_DEL(op) PyMem_DEL((op))
75#endif
76
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077/* -------------------------------------------------------------------- */
78
Fredrik Lundh80946112000-06-29 18:03:25 +000079#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000080#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000081#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000082/* fastest possible local call under MSVC */
83#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000084#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000085#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000086#else
87#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000088#endif
89
90/* error codes */
91#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000092#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000093#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000094#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000095#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000097#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000098#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000099#else
100#define TRACE(v)
101#endif
102
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000103/* -------------------------------------------------------------------- */
104/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000105
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000106/* default character predicates (run sre_chars.py to regenerate tables) */
107
108#define SRE_DIGIT_MASK 1
109#define SRE_SPACE_MASK 2
110#define SRE_LINEBREAK_MASK 4
111#define SRE_ALNUM_MASK 8
112#define SRE_WORD_MASK 16
113
Fredrik Lundh21009b92001-09-18 18:47:09 +0000114/* FIXME: this assumes ASCII. create tables in init_sre() instead */
115
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000116static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11925, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1210, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
123
Fredrik Lundhb389df32000-06-29 12:48:37 +0000124static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012510, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12627, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12744, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12861, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
129108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
130122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
131106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
132120, 121, 122, 123, 124, 125, 126, 127 };
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134#define SRE_IS_DIGIT(ch)\
135 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
136#define SRE_IS_SPACE(ch)\
137 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
138#define SRE_IS_LINEBREAK(ch)\
139 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
140#define SRE_IS_ALNUM(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
142#define SRE_IS_WORD(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000144
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145static unsigned int sre_lower(unsigned int ch)
146{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000147 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000148}
149
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000150/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000151/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
152 * warnings when c's type supports only numbers < N+1 */
153#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
154#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000155#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000156#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000157#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
158
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000159static unsigned int sre_lower_locale(unsigned int ch)
160{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000161 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000162}
163
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000164/* unicode-specific character predicates */
165
166#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000167
Mark Dickinson1f268282009-07-28 17:22:36 +0000168#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000169#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
170#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000171#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000172#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000173
174static unsigned int sre_lower_unicode(unsigned int ch)
175{
176 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
177}
178
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000179#endif
180
Guido van Rossumb700df92000-03-31 14:59:30 +0000181LOCAL(int)
182sre_category(SRE_CODE category, unsigned int ch)
183{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000184 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000185
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000186 case SRE_CATEGORY_DIGIT:
187 return SRE_IS_DIGIT(ch);
188 case SRE_CATEGORY_NOT_DIGIT:
189 return !SRE_IS_DIGIT(ch);
190 case SRE_CATEGORY_SPACE:
191 return SRE_IS_SPACE(ch);
192 case SRE_CATEGORY_NOT_SPACE:
193 return !SRE_IS_SPACE(ch);
194 case SRE_CATEGORY_WORD:
195 return SRE_IS_WORD(ch);
196 case SRE_CATEGORY_NOT_WORD:
197 return !SRE_IS_WORD(ch);
198 case SRE_CATEGORY_LINEBREAK:
199 return SRE_IS_LINEBREAK(ch);
200 case SRE_CATEGORY_NOT_LINEBREAK:
201 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000202
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000203 case SRE_CATEGORY_LOC_WORD:
204 return SRE_LOC_IS_WORD(ch);
205 case SRE_CATEGORY_LOC_NOT_WORD:
206 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000207
208#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000209 case SRE_CATEGORY_UNI_DIGIT:
210 return SRE_UNI_IS_DIGIT(ch);
211 case SRE_CATEGORY_UNI_NOT_DIGIT:
212 return !SRE_UNI_IS_DIGIT(ch);
213 case SRE_CATEGORY_UNI_SPACE:
214 return SRE_UNI_IS_SPACE(ch);
215 case SRE_CATEGORY_UNI_NOT_SPACE:
216 return !SRE_UNI_IS_SPACE(ch);
217 case SRE_CATEGORY_UNI_WORD:
218 return SRE_UNI_IS_WORD(ch);
219 case SRE_CATEGORY_UNI_NOT_WORD:
220 return !SRE_UNI_IS_WORD(ch);
221 case SRE_CATEGORY_UNI_LINEBREAK:
222 return SRE_UNI_IS_LINEBREAK(ch);
223 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
224 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000225#else
226 case SRE_CATEGORY_UNI_DIGIT:
227 return SRE_IS_DIGIT(ch);
228 case SRE_CATEGORY_UNI_NOT_DIGIT:
229 return !SRE_IS_DIGIT(ch);
230 case SRE_CATEGORY_UNI_SPACE:
231 return SRE_IS_SPACE(ch);
232 case SRE_CATEGORY_UNI_NOT_SPACE:
233 return !SRE_IS_SPACE(ch);
234 case SRE_CATEGORY_UNI_WORD:
235 return SRE_LOC_IS_WORD(ch);
236 case SRE_CATEGORY_UNI_NOT_WORD:
237 return !SRE_LOC_IS_WORD(ch);
238 case SRE_CATEGORY_UNI_LINEBREAK:
239 return SRE_IS_LINEBREAK(ch);
240 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
241 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000242#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000243 }
244 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000245}
246
247/* helpers */
248
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000249static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000250data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000251{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000252 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000253 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000254 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000255 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000256 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000257}
258
259static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000260data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000261{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000263 minsize = state->data_stack_base+size;
264 cursize = state->data_stack_size;
265 if (cursize < minsize) {
266 void* stack;
267 cursize = minsize+minsize/4+1024;
268 TRACE(("allocate/grow stack %d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000269 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000270 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000271 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000272 return SRE_ERROR_MEMORY;
273 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000275 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000276 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000277 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000278}
279
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000280/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000281
282#define SRE_CHAR unsigned char
283#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000284#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000285#define SRE_CHARSET sre_charset
286#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000287#define SRE_MATCH sre_match
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000288#define SRE_MATCH_CONTEXT sre_match_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000289#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000290#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000291
292#if defined(HAVE_UNICODE)
293
Guido van Rossumb700df92000-03-31 14:59:30 +0000294#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000295#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000296#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000298#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000299#undef SRE_SEARCH
300#undef SRE_MATCH
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000301#undef SRE_MATCH_CONTEXT
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000302#undef SRE_INFO
303#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000304#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000305#undef SRE_AT
306#undef SRE_CHAR
307
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000308/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000309
310#define SRE_CHAR Py_UNICODE
311#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000312#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000313#define SRE_CHARSET sre_ucharset
314#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000315#define SRE_MATCH sre_umatch
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000316#define SRE_MATCH_CONTEXT sre_umatch_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000317#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000318#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000319#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000320
321#endif /* SRE_RECURSIVE */
322
323/* -------------------------------------------------------------------- */
324/* String matching engine */
325
326/* the following section is compiled twice, with different character
327 settings */
328
329LOCAL(int)
330SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
331{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000332 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000333
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000334 Py_ssize_t thisp, thatp;
Guido van Rossumb700df92000-03-31 14:59:30 +0000335
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000336 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000337
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000338 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000339 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000340 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000341
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 case SRE_AT_BEGINNING_LINE:
343 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000344 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000345
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000346 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000347 return (((void*) (ptr+1) == state->end &&
348 SRE_IS_LINEBREAK((int) ptr[0])) ||
349 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000350
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000351 case SRE_AT_END_LINE:
352 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000353 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000354
Fredrik Lundh770617b2001-01-14 15:06:11 +0000355 case SRE_AT_END_STRING:
356 return ((void*) ptr == state->end);
357
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000358 case SRE_AT_BOUNDARY:
359 if (state->beginning == state->end)
360 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000361 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000362 SRE_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000363 thisp = ((void*) ptr < state->end) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000364 SRE_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000365 return thisp != thatp;
Fredrik Lundh80946112000-06-29 18:03:25 +0000366
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000367 case SRE_AT_NON_BOUNDARY:
368 if (state->beginning == state->end)
369 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000370 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000371 SRE_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000372 thisp = ((void*) ptr < state->end) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000373 SRE_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000374 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000375
376 case SRE_AT_LOC_BOUNDARY:
377 if (state->beginning == state->end)
378 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000379 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000380 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000381 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000382 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000383 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000384
385 case SRE_AT_LOC_NON_BOUNDARY:
386 if (state->beginning == state->end)
387 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000388 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000389 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000390 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000391 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000392 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000393
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000394#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000395 case SRE_AT_UNI_BOUNDARY:
396 if (state->beginning == state->end)
397 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000398 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000399 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000400 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000401 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000402 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000403
404 case SRE_AT_UNI_NON_BOUNDARY:
405 if (state->beginning == state->end)
406 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000407 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000408 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000409 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000410 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000411 return thisp == thatp;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000412#endif
413
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000414 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000415
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000416 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000417}
418
419LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000420SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000421{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000423
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000424 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000425
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000426 for (;;) {
427 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000428
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000429 case SRE_OP_FAILURE:
430 return !ok;
431
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000432 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000433 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000434 if (ch == set[0])
435 return ok;
436 set++;
437 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000438
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000439 case SRE_OP_CATEGORY:
440 /* <CATEGORY> <code> */
441 if (sre_category(set[0], (int) ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000442 return ok;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000443 set += 1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000445
Fredrik Lundh3562f112000-07-02 12:00:07 +0000446 case SRE_OP_CHARSET:
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000447 if (sizeof(SRE_CODE) == 2) {
448 /* <CHARSET> <bitmap> (16 bits per code word) */
449 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
450 return ok;
451 set += 16;
Tim Peters3d563502006-01-21 02:47:53 +0000452 }
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000453 else {
454 /* <CHARSET> <bitmap> (32 bits per code word) */
Gregory P. Smith90555d02012-12-10 17:44:44 -0800455 if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31))))
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000456 return ok;
457 set += 8;
458 }
Fredrik Lundh3562f112000-07-02 12:00:07 +0000459 break;
460
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000461 case SRE_OP_RANGE:
462 /* <RANGE> <lower> <upper> */
463 if (set[0] <= ch && ch <= set[1])
464 return ok;
465 set += 2;
466 break;
467
468 case SRE_OP_NEGATE:
469 ok = !ok;
470 break;
471
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000472 case SRE_OP_BIGCHARSET:
473 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
474 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475 Py_ssize_t count, block;
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000476 count = *(set++);
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000477
478 if (sizeof(SRE_CODE) == 2) {
479 block = ((unsigned char*)set)[ch >> 8];
480 set += 128;
481 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
482 return ok;
483 set += count*16;
484 }
485 else {
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000486 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
487 * warnings when c's type supports only numbers < N+1 */
488 if (!(ch & ~65535))
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000489 block = ((unsigned char*)set)[ch >> 8];
490 else
491 block = -1;
492 set += 64;
Tim Peters3d563502006-01-21 02:47:53 +0000493 if (block >=0 &&
Gregory P. Smith90555d02012-12-10 17:44:44 -0800494 (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31))))
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000495 return ok;
496 set += count*8;
497 }
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000498 break;
499 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000500
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000501 default:
502 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000503 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000504 return 0;
505 }
506 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000507}
508
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000509LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000510
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000511LOCAL(Py_ssize_t)
512SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000513{
514 SRE_CODE chr;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000515 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
516 SRE_CHAR* end = (SRE_CHAR *)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000517 Py_ssize_t i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000518
519 /* adjust end */
Serhiy Storchaka70ca0212013-02-16 16:47:47 +0200520 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000521 end = ptr + maxcount;
522
523 switch (pattern[0]) {
524
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000525 case SRE_OP_IN:
526 /* repeated set */
527 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
528 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
529 ptr++;
530 break;
531
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000532 case SRE_OP_ANY:
533 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000534 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000535 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
536 ptr++;
537 break;
538
539 case SRE_OP_ANY_ALL:
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000540 /* repeated dot wildcard. skip to the end of the target
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000541 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000542 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000543 ptr = end;
544 break;
545
546 case SRE_OP_LITERAL:
547 /* repeated literal */
548 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000549 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000550 while (ptr < end && (SRE_CODE) *ptr == chr)
551 ptr++;
552 break;
553
554 case SRE_OP_LITERAL_IGNORE:
555 /* repeated literal */
556 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000557 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000558 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
559 ptr++;
560 break;
561
562 case SRE_OP_NOT_LITERAL:
563 /* repeated non-literal */
564 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000565 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000566 while (ptr < end && (SRE_CODE) *ptr != chr)
567 ptr++;
568 break;
Tim Peters3d563502006-01-21 02:47:53 +0000569
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000570 case SRE_OP_NOT_LITERAL_IGNORE:
571 /* repeated non-literal */
572 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000573 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000574 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
575 ptr++;
576 break;
577
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000578 default:
579 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000581 while ((SRE_CHAR*) state->ptr < end) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000582 i = SRE_MATCH(state, pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000583 if (i < 0)
584 return i;
585 if (!i)
586 break;
587 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000588 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
589 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000590 return (SRE_CHAR*) state->ptr - ptr;
591 }
592
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000593 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000594 return ptr - (SRE_CHAR*) state->ptr;
595}
596
Fredrik Lundh33accc12000-08-27 20:59:47 +0000597#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000598LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000599SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
600{
601 /* check if an SRE_OP_INFO block matches at the current position.
602 returns the number of SRE_CODE objects to skip if successful, 0
603 if no match */
604
605 SRE_CHAR* end = state->end;
606 SRE_CHAR* ptr = state->ptr;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000607 Py_ssize_t i;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000608
609 /* check minimal length */
610 if (pattern[3] && (end - ptr) < pattern[3])
611 return 0;
612
613 /* check known prefix */
614 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
615 /* <length> <skip> <prefix data> <overlap data> */
616 for (i = 0; i < pattern[5]; i++)
617 if ((SRE_CODE) ptr[i] != pattern[7 + i])
618 return 0;
619 return pattern[0] + 2 * pattern[6];
620 }
621 return pattern[0];
622}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000623#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000624
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000625/* The macros below should be used to protect recursive SRE_MATCH()
626 * calls that *failed* and do *not* return immediately (IOW, those
627 * that will backtrack). Explaining:
628 *
629 * - Recursive SRE_MATCH() returned true: that's usually a success
630 * (besides atypical cases like ASSERT_NOT), therefore there's no
631 * reason to restore lastmark;
632 *
633 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
634 * is returning to the caller: If the current SRE_MATCH() is the
635 * top function of the recursion, returning false will be a matching
636 * failure, and it doesn't matter where lastmark is pointing to.
637 * If it's *not* the top function, it will be a recursive SRE_MATCH()
638 * failure by itself, and the calling SRE_MATCH() will have to deal
639 * with the failure by the same rules explained here (it will restore
640 * lastmark by itself if necessary);
641 *
642 * - Recursive SRE_MATCH() returned false, and will continue the
643 * outside 'for' loop: must be protected when breaking, since the next
644 * OP could potentially depend on lastmark;
Tim Peters3d563502006-01-21 02:47:53 +0000645 *
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000646 * - Recursive SRE_MATCH() returned false, and will be called again
647 * inside a local for/while loop: must be protected between each
648 * loop iteration, since the recursive SRE_MATCH() could do anything,
649 * and could potentially depend on lastmark.
650 *
651 * For more information, check the discussion at SF patch #712900.
652 */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000653#define LASTMARK_SAVE() \
654 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000655 ctx->lastmark = state->lastmark; \
656 ctx->lastindex = state->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000657 } while (0)
658#define LASTMARK_RESTORE() \
659 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000660 state->lastmark = ctx->lastmark; \
661 state->lastindex = ctx->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000662 } while (0)
663
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000664#define RETURN_ERROR(i) do { return i; } while(0)
665#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
666#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
667
668#define RETURN_ON_ERROR(i) \
669 do { if (i < 0) RETURN_ERROR(i); } while (0)
670#define RETURN_ON_SUCCESS(i) \
671 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
672#define RETURN_ON_FAILURE(i) \
673 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
674
675#define SFY(x) #x
676
677#define DATA_STACK_ALLOC(state, type, ptr) \
678do { \
679 alloc_pos = state->data_stack_base; \
680 TRACE(("allocating %s in %d (%d)\n", \
681 SFY(type), alloc_pos, sizeof(type))); \
682 if (state->data_stack_size < alloc_pos+sizeof(type)) { \
683 int j = data_stack_grow(state, sizeof(type)); \
684 if (j < 0) return j; \
685 if (ctx_pos != -1) \
686 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
687 } \
688 ptr = (type*)(state->data_stack+alloc_pos); \
689 state->data_stack_base += sizeof(type); \
690} while (0)
691
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000692#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
693do { \
694 TRACE(("looking up %s at %d\n", SFY(type), pos)); \
695 ptr = (type*)(state->data_stack+pos); \
696} while (0)
697
698#define DATA_STACK_PUSH(state, data, size) \
699do { \
700 TRACE(("copy data in %p to %d (%d)\n", \
701 data, state->data_stack_base, size)); \
702 if (state->data_stack_size < state->data_stack_base+size) { \
703 int j = data_stack_grow(state, size); \
704 if (j < 0) return j; \
705 if (ctx_pos != -1) \
706 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
707 } \
708 memcpy(state->data_stack+state->data_stack_base, data, size); \
709 state->data_stack_base += size; \
710} while (0)
711
712#define DATA_STACK_POP(state, data, size, discard) \
713do { \
714 TRACE(("copy data to %p from %d (%d)\n", \
715 data, state->data_stack_base-size, size)); \
716 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
717 if (discard) \
718 state->data_stack_base -= size; \
719} while (0)
720
721#define DATA_STACK_POP_DISCARD(state, size) \
722do { \
723 TRACE(("discard data from %d (%d)\n", \
724 state->data_stack_base-size, size)); \
725 state->data_stack_base -= size; \
726} while(0)
727
728#define DATA_PUSH(x) \
729 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
730#define DATA_POP(x) \
731 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000732#define DATA_POP_DISCARD(x) \
733 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
734#define DATA_ALLOC(t,p) \
735 DATA_STACK_ALLOC(state, t, p)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000736#define DATA_LOOKUP_AT(t,p,pos) \
737 DATA_STACK_LOOKUP_AT(state,t,p,pos)
738
739#define MARK_PUSH(lastmark) \
740 do if (lastmark > 0) { \
741 i = lastmark; /* ctx->lastmark may change if reallocated */ \
742 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
743 } while (0)
744#define MARK_POP(lastmark) \
745 do if (lastmark > 0) { \
746 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
747 } while (0)
748#define MARK_POP_KEEP(lastmark) \
749 do if (lastmark > 0) { \
750 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
751 } while (0)
752#define MARK_POP_DISCARD(lastmark) \
753 do if (lastmark > 0) { \
754 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
755 } while (0)
756
757#define JUMP_NONE 0
758#define JUMP_MAX_UNTIL_1 1
759#define JUMP_MAX_UNTIL_2 2
760#define JUMP_MAX_UNTIL_3 3
761#define JUMP_MIN_UNTIL_1 4
762#define JUMP_MIN_UNTIL_2 5
763#define JUMP_MIN_UNTIL_3 6
764#define JUMP_REPEAT 7
765#define JUMP_REPEAT_ONE_1 8
766#define JUMP_REPEAT_ONE_2 9
767#define JUMP_MIN_REPEAT_ONE 10
768#define JUMP_BRANCH 11
769#define JUMP_ASSERT 12
770#define JUMP_ASSERT_NOT 13
771
772#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
773 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
774 nextctx->last_ctx_pos = ctx_pos; \
775 nextctx->jump = jumpvalue; \
776 nextctx->pattern = nextpattern; \
777 ctx_pos = alloc_pos; \
778 ctx = nextctx; \
779 goto entrance; \
780 jumplabel: \
781 while (0) /* gcc doesn't like labels at end of scopes */ \
782
783typedef struct {
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000784 Py_ssize_t last_ctx_pos;
785 Py_ssize_t jump;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000786 SRE_CHAR* ptr;
787 SRE_CODE* pattern;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000788 Py_ssize_t count;
789 Py_ssize_t lastmark;
790 Py_ssize_t lastindex;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000791 union {
792 SRE_CODE chr;
793 SRE_REPEAT* rep;
794 } u;
795} SRE_MATCH_CONTEXT;
796
797/* check if string matches the given pattern. returns <0 for
798 error, 0 for failure, and 1 for success */
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000799LOCAL(Py_ssize_t)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000800SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
Guido van Rossumb700df92000-03-31 14:59:30 +0000801{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000802 SRE_CHAR* end = (SRE_CHAR *)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000803 Py_ssize_t alloc_pos, ctx_pos = -1;
804 Py_ssize_t i, ret = 0;
805 Py_ssize_t jump;
Christian Heimes2380ac72008-01-09 00:17:24 +0000806 unsigned int sigcount=0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000807
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000808 SRE_MATCH_CONTEXT* ctx;
809 SRE_MATCH_CONTEXT* nextctx;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000810
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000811 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000812
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000813 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
814 ctx->last_ctx_pos = -1;
815 ctx->jump = JUMP_NONE;
816 ctx->pattern = pattern;
817 ctx_pos = alloc_pos;
818
819entrance:
820
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000821 ctx->ptr = (SRE_CHAR *)state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000822
823 if (ctx->pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000824 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000825 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000826 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000827 TRACE(("reject (got %d chars, need %d)\n",
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000828 (end - ctx->ptr), ctx->pattern[3]));
829 RETURN_FAILURE;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000830 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000831 ctx->pattern += ctx->pattern[1] + 1;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000832 }
833
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000834 for (;;) {
Christian Heimes2380ac72008-01-09 00:17:24 +0000835 ++sigcount;
836 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
837 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
Guido van Rossumb700df92000-03-31 14:59:30 +0000838
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000839 switch (*ctx->pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000840
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000841 case SRE_OP_MARK:
842 /* set mark */
843 /* <MARK> <gid> */
844 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
845 ctx->ptr, ctx->pattern[0]));
846 i = ctx->pattern[0];
847 if (i & 1)
848 state->lastindex = i/2 + 1;
849 if (i > state->lastmark) {
850 /* state->lastmark is the highest valid index in the
851 state->mark array. If it is increased by more than 1,
852 the intervening marks must be set to NULL to signal
Tim Peters3d563502006-01-21 02:47:53 +0000853 that these marks have not been encountered. */
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000854 Py_ssize_t j = state->lastmark + 1;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000855 while (j < i)
856 state->mark[j++] = NULL;
857 state->lastmark = i;
858 }
859 state->mark[i] = ctx->ptr;
860 ctx->pattern++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000861 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000862
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000863 case SRE_OP_LITERAL:
864 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000865 /* <LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000866 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
867 ctx->ptr, *ctx->pattern));
868 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
869 RETURN_FAILURE;
870 ctx->pattern++;
871 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000872 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000873
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000874 case SRE_OP_NOT_LITERAL:
875 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000876 /* <NOT_LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000877 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
878 ctx->ptr, *ctx->pattern));
879 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
880 RETURN_FAILURE;
881 ctx->pattern++;
882 ctx->ptr++;
883 break;
884
885 case SRE_OP_SUCCESS:
886 /* end of pattern */
887 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
888 state->ptr = ctx->ptr;
889 RETURN_SUCCESS;
890
891 case SRE_OP_AT:
892 /* match at given position */
893 /* <AT> <code> */
894 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
895 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
896 RETURN_FAILURE;
897 ctx->pattern++;
898 break;
899
900 case SRE_OP_CATEGORY:
901 /* match at given category */
902 /* <CATEGORY> <code> */
903 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
904 ctx->ptr, *ctx->pattern));
905 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
906 RETURN_FAILURE;
907 ctx->pattern++;
908 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000909 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000910
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000911 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000912 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000913 /* <ANY> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000914 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
915 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
916 RETURN_FAILURE;
917 ctx->ptr++;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000918 break;
919
920 case SRE_OP_ANY_ALL:
921 /* match anything */
922 /* <ANY_ALL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000923 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
924 if (ctx->ptr >= end)
925 RETURN_FAILURE;
926 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000927 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000928
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000929 case SRE_OP_IN:
930 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000931 /* <IN> <skip> <set> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000932 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
933 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
934 RETURN_FAILURE;
935 ctx->pattern += ctx->pattern[0];
936 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000937 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000938
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000939 case SRE_OP_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000940 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
941 ctx->pattern, ctx->ptr, ctx->pattern[0]));
942 if (ctx->ptr >= end ||
943 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
944 RETURN_FAILURE;
945 ctx->pattern++;
946 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000947 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000948
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000949 case SRE_OP_NOT_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000950 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
951 ctx->pattern, ctx->ptr, *ctx->pattern));
952 if (ctx->ptr >= end ||
953 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
954 RETURN_FAILURE;
955 ctx->pattern++;
956 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000957 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000958
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000959 case SRE_OP_IN_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000960 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
961 if (ctx->ptr >= end
962 || !SRE_CHARSET(ctx->pattern+1,
963 (SRE_CODE)state->lower(*ctx->ptr)))
964 RETURN_FAILURE;
965 ctx->pattern += ctx->pattern[0];
966 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000967 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000968
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000969 case SRE_OP_JUMP:
970 case SRE_OP_INFO:
971 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000972 /* <JUMP> <offset> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000973 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
974 ctx->ptr, ctx->pattern[0]));
975 ctx->pattern += ctx->pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000976 break;
977
978 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000979 /* alternation */
980 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000981 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000982 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000983 ctx->u.rep = state->repeat;
984 if (ctx->u.rep)
985 MARK_PUSH(ctx->lastmark);
986 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
987 if (ctx->pattern[1] == SRE_OP_LITERAL &&
988 (ctx->ptr >= end ||
989 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000990 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000991 if (ctx->pattern[1] == SRE_OP_IN &&
992 (ctx->ptr >= end ||
993 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000994 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000995 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000996 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000997 if (ret) {
998 if (ctx->u.rep)
999 MARK_POP_DISCARD(ctx->lastmark);
1000 RETURN_ON_ERROR(ret);
1001 RETURN_SUCCESS;
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +00001002 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001003 if (ctx->u.rep)
1004 MARK_POP_KEEP(ctx->lastmark);
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001005 LASTMARK_RESTORE();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001006 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001007 if (ctx->u.rep)
1008 MARK_POP_DISCARD(ctx->lastmark);
1009 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001010
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001011 case SRE_OP_REPEAT_ONE:
1012 /* match repeated sequence (maximizing regexp) */
1013
1014 /* this operator only works if the repeated item is
1015 exactly one character wide, and we're not already
1016 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +00001017 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001018
1019 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1020
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001021 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1022 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001023
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001024 if (ctx->ptr + ctx->pattern[1] > end)
1025 RETURN_FAILURE; /* cannot match */
Fredrik Lundhe1869832000-08-01 22:47:49 +00001026
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001027 state->ptr = ctx->ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001028
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001029 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1030 RETURN_ON_ERROR(ret);
1031 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1032 ctx->count = ret;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001033 ctx->ptr += ctx->count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001034
1035 /* when we arrive here, count contains the number of
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001036 matches, and ctx->ptr points to the tail of the target
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001037 string. check if the rest of the pattern matches,
1038 and backtrack if not. */
1039
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001040 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001041 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001042
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001043 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001044 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001045 state->ptr = ctx->ptr;
1046 RETURN_SUCCESS;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001047 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001048
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001049 LASTMARK_SAVE();
1050
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001051 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001052 /* tail starts with a literal. skip positions where
1053 the rest of the pattern cannot possibly match */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001054 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001055 for (;;) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001056 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001057 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1058 ctx->ptr--;
1059 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001060 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001061 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001062 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001063 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001064 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1065 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001066 if (ret) {
1067 RETURN_ON_ERROR(ret);
1068 RETURN_SUCCESS;
1069 }
Tim Peters3d563502006-01-21 02:47:53 +00001070
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001071 LASTMARK_RESTORE();
Tim Peters3d563502006-01-21 02:47:53 +00001072
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001073 ctx->ptr--;
1074 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001075 }
1076
1077 } else {
1078 /* general case */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001079 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001080 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001081 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1082 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001083 if (ret) {
1084 RETURN_ON_ERROR(ret);
1085 RETURN_SUCCESS;
1086 }
1087 ctx->ptr--;
1088 ctx->count--;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001089 LASTMARK_RESTORE();
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001090 }
1091 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001092 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001093
Guido van Rossum41c99e72003-04-14 17:59:34 +00001094 case SRE_OP_MIN_REPEAT_ONE:
1095 /* match repeated sequence (minimizing regexp) */
1096
1097 /* this operator only works if the repeated item is
1098 exactly one character wide, and we're not already
1099 collecting backtracking points. for other cases,
1100 use the MIN_REPEAT operator */
1101
1102 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1103
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001104 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1105 ctx->pattern[1], ctx->pattern[2]));
Guido van Rossum41c99e72003-04-14 17:59:34 +00001106
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001107 if (ctx->ptr + ctx->pattern[1] > end)
1108 RETURN_FAILURE; /* cannot match */
Guido van Rossum41c99e72003-04-14 17:59:34 +00001109
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001110 state->ptr = ctx->ptr;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001111
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001112 if (ctx->pattern[1] == 0)
1113 ctx->count = 0;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001114 else {
1115 /* count using pattern min as the maximum */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001116 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1117 RETURN_ON_ERROR(ret);
1118 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001119 if (ret < (Py_ssize_t) ctx->pattern[1])
Tim Peters3d563502006-01-21 02:47:53 +00001120 /* didn't match minimum number of times */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001121 RETURN_FAILURE;
1122 /* advance past minimum matches of repeat */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001123 ctx->count = ret;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001124 ctx->ptr += ctx->count;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001125 }
1126
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001127 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Guido van Rossum41c99e72003-04-14 17:59:34 +00001128 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001129 state->ptr = ctx->ptr;
1130 RETURN_SUCCESS;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001131
1132 } else {
1133 /* general case */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001134 LASTMARK_SAVE();
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001135 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001136 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001137 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001138 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1139 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001140 if (ret) {
1141 RETURN_ON_ERROR(ret);
1142 RETURN_SUCCESS;
1143 }
1144 state->ptr = ctx->ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001145 ret = SRE_COUNT(state, ctx->pattern+3, 1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001146 RETURN_ON_ERROR(ret);
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001147 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001148 if (ret == 0)
Guido van Rossum41c99e72003-04-14 17:59:34 +00001149 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001150 assert(ret == 1);
1151 ctx->ptr++;
1152 ctx->count++;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001153 LASTMARK_RESTORE();
Guido van Rossum41c99e72003-04-14 17:59:34 +00001154 }
Guido van Rossum41c99e72003-04-14 17:59:34 +00001155 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001156 RETURN_FAILURE;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001157
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001158 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001159 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001160 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001161 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001162 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1163 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001164
1165 /* install new repeat context */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001166 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
Thomas Wouters89f507f2006-12-13 04:49:30 +00001167 if (!ctx->u.rep) {
1168 PyErr_NoMemory();
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001169 RETURN_FAILURE;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001170 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001171 ctx->u.rep->count = -1;
1172 ctx->u.rep->pattern = ctx->pattern;
1173 ctx->u.rep->prev = state->repeat;
1174 ctx->u.rep->last_ptr = NULL;
1175 state->repeat = ctx->u.rep;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001176
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001177 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001178 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001179 state->repeat = ctx->u.rep->prev;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001180 PyObject_FREE(ctx->u.rep);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001181
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001182 if (ret) {
1183 RETURN_ON_ERROR(ret);
1184 RETURN_SUCCESS;
1185 }
1186 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001187
1188 case SRE_OP_MAX_UNTIL:
1189 /* maximizing repeat */
1190 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1191
1192 /* FIXME: we probably need to deal with zero-width
1193 matches in here... */
1194
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001195 ctx->u.rep = state->repeat;
1196 if (!ctx->u.rep)
1197 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001198
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001199 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001200
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001201 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001202
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001203 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1204 ctx->ptr, ctx->count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001205
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001206 if (ctx->count < ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001207 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001208 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001209 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1210 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001211 if (ret) {
1212 RETURN_ON_ERROR(ret);
1213 RETURN_SUCCESS;
1214 }
1215 ctx->u.rep->count = ctx->count-1;
1216 state->ptr = ctx->ptr;
1217 RETURN_FAILURE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001218 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001219
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001220 if ((ctx->count < ctx->u.rep->pattern[2] ||
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001221 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001222 state->ptr != ctx->u.rep->last_ptr) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001223 /* we may have enough matches, but if we can
1224 match another item, do so */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001225 ctx->u.rep->count = ctx->count;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001226 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001227 MARK_PUSH(ctx->lastmark);
1228 /* zero-width match protection */
1229 DATA_PUSH(&ctx->u.rep->last_ptr);
1230 ctx->u.rep->last_ptr = state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001231 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1232 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001233 DATA_POP(&ctx->u.rep->last_ptr);
1234 if (ret) {
1235 MARK_POP_DISCARD(ctx->lastmark);
1236 RETURN_ON_ERROR(ret);
1237 RETURN_SUCCESS;
1238 }
1239 MARK_POP(ctx->lastmark);
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001240 LASTMARK_RESTORE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001241 ctx->u.rep->count = ctx->count-1;
1242 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001243 }
1244
1245 /* cannot match more repeated items here. make sure the
1246 tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001247 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001248 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001249 RETURN_ON_SUCCESS(ret);
1250 state->repeat = ctx->u.rep;
1251 state->ptr = ctx->ptr;
1252 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001253
1254 case SRE_OP_MIN_UNTIL:
1255 /* minimizing repeat */
1256 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1257
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001258 ctx->u.rep = state->repeat;
1259 if (!ctx->u.rep)
1260 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001261
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001262 state->ptr = ctx->ptr;
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001263
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001264 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001265
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001266 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1267 ctx->ptr, ctx->count, ctx->u.rep->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001268
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001269 if (ctx->count < ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001270 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001271 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001272 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1273 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001274 if (ret) {
1275 RETURN_ON_ERROR(ret);
1276 RETURN_SUCCESS;
1277 }
1278 ctx->u.rep->count = ctx->count-1;
1279 state->ptr = ctx->ptr;
1280 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001281 }
1282
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001283 LASTMARK_SAVE();
1284
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001285 /* see if the tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001286 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001287 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001288 if (ret) {
1289 RETURN_ON_ERROR(ret);
1290 RETURN_SUCCESS;
1291 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001292
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001293 state->repeat = ctx->u.rep;
1294 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001295
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001296 LASTMARK_RESTORE();
1297
Serhiy Storchakafa468162013-02-16 21:23:53 +02001298 if ((ctx->count >= ctx->u.rep->pattern[2]
1299 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1300 state->ptr == ctx->u.rep->last_ptr)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001301 RETURN_FAILURE;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001302
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001303 ctx->u.rep->count = ctx->count;
Serhiy Storchakafa468162013-02-16 21:23:53 +02001304 /* zero-width match protection */
1305 DATA_PUSH(&ctx->u.rep->last_ptr);
1306 ctx->u.rep->last_ptr = state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001307 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1308 ctx->u.rep->pattern+3);
Serhiy Storchakafa468162013-02-16 21:23:53 +02001309 DATA_POP(&ctx->u.rep->last_ptr);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001310 if (ret) {
1311 RETURN_ON_ERROR(ret);
1312 RETURN_SUCCESS;
1313 }
1314 ctx->u.rep->count = ctx->count-1;
1315 state->ptr = ctx->ptr;
1316 RETURN_FAILURE;
1317
1318 case SRE_OP_GROUPREF:
1319 /* match backreference */
1320 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1321 ctx->ptr, ctx->pattern[0]));
1322 i = ctx->pattern[0];
1323 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001324 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001325 if (groupref >= state->lastmark) {
1326 RETURN_FAILURE;
1327 } else {
1328 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1329 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1330 if (!p || !e || e < p)
1331 RETURN_FAILURE;
1332 while (p < e) {
1333 if (ctx->ptr >= end || *ctx->ptr != *p)
1334 RETURN_FAILURE;
1335 p++; ctx->ptr++;
1336 }
1337 }
1338 }
1339 ctx->pattern++;
1340 break;
1341
1342 case SRE_OP_GROUPREF_IGNORE:
1343 /* match backreference */
1344 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1345 ctx->ptr, ctx->pattern[0]));
1346 i = ctx->pattern[0];
1347 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001348 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001349 if (groupref >= state->lastmark) {
1350 RETURN_FAILURE;
1351 } else {
1352 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1353 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1354 if (!p || !e || e < p)
1355 RETURN_FAILURE;
1356 while (p < e) {
1357 if (ctx->ptr >= end ||
1358 state->lower(*ctx->ptr) != state->lower(*p))
1359 RETURN_FAILURE;
1360 p++; ctx->ptr++;
1361 }
1362 }
1363 }
1364 ctx->pattern++;
1365 break;
1366
1367 case SRE_OP_GROUPREF_EXISTS:
1368 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1369 ctx->ptr, ctx->pattern[0]));
1370 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1371 i = ctx->pattern[0];
1372 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001373 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001374 if (groupref >= state->lastmark) {
1375 ctx->pattern += ctx->pattern[1];
1376 break;
1377 } else {
1378 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1379 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1380 if (!p || !e || e < p) {
1381 ctx->pattern += ctx->pattern[1];
1382 break;
1383 }
1384 }
1385 }
1386 ctx->pattern += 2;
1387 break;
1388
1389 case SRE_OP_ASSERT:
1390 /* assert subpattern */
1391 /* <ASSERT> <skip> <back> <pattern> */
1392 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1393 ctx->ptr, ctx->pattern[1]));
1394 state->ptr = ctx->ptr - ctx->pattern[1];
1395 if (state->ptr < state->beginning)
1396 RETURN_FAILURE;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001397 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001398 RETURN_ON_FAILURE(ret);
1399 ctx->pattern += ctx->pattern[0];
1400 break;
1401
1402 case SRE_OP_ASSERT_NOT:
1403 /* assert not subpattern */
1404 /* <ASSERT_NOT> <skip> <back> <pattern> */
1405 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1406 ctx->ptr, ctx->pattern[1]));
1407 state->ptr = ctx->ptr - ctx->pattern[1];
1408 if (state->ptr >= state->beginning) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001409 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001410 if (ret) {
1411 RETURN_ON_ERROR(ret);
1412 RETURN_FAILURE;
1413 }
1414 }
1415 ctx->pattern += ctx->pattern[0];
1416 break;
1417
1418 case SRE_OP_FAILURE:
1419 /* immediate failure */
1420 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1421 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001422
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001423 default:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001424 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1425 ctx->pattern[-1]));
1426 RETURN_ERROR(SRE_ERROR_ILLEGAL);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001427 }
1428 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001429
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001430exit:
1431 ctx_pos = ctx->last_ctx_pos;
1432 jump = ctx->jump;
1433 DATA_POP_DISCARD(ctx);
1434 if (ctx_pos == -1)
1435 return ret;
1436 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1437
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001438 switch (jump) {
1439 case JUMP_MAX_UNTIL_2:
1440 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1441 goto jump_max_until_2;
1442 case JUMP_MAX_UNTIL_3:
1443 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1444 goto jump_max_until_3;
1445 case JUMP_MIN_UNTIL_2:
1446 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1447 goto jump_min_until_2;
1448 case JUMP_MIN_UNTIL_3:
1449 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1450 goto jump_min_until_3;
1451 case JUMP_BRANCH:
1452 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1453 goto jump_branch;
1454 case JUMP_MAX_UNTIL_1:
1455 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1456 goto jump_max_until_1;
1457 case JUMP_MIN_UNTIL_1:
1458 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1459 goto jump_min_until_1;
1460 case JUMP_REPEAT:
1461 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1462 goto jump_repeat;
1463 case JUMP_REPEAT_ONE_1:
1464 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1465 goto jump_repeat_one_1;
1466 case JUMP_REPEAT_ONE_2:
1467 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1468 goto jump_repeat_one_2;
1469 case JUMP_MIN_REPEAT_ONE:
1470 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1471 goto jump_min_repeat_one;
1472 case JUMP_ASSERT:
1473 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1474 goto jump_assert;
1475 case JUMP_ASSERT_NOT:
1476 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1477 goto jump_assert_not;
1478 case JUMP_NONE:
1479 TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
1480 break;
1481 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001482
1483 return ret; /* should never get here */
Guido van Rossumb700df92000-03-31 14:59:30 +00001484}
1485
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001486LOCAL(Py_ssize_t)
Guido van Rossumb700df92000-03-31 14:59:30 +00001487SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1488{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001489 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1490 SRE_CHAR* end = (SRE_CHAR *)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001491 Py_ssize_t status = 0;
1492 Py_ssize_t prefix_len = 0;
1493 Py_ssize_t prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001494 SRE_CODE* prefix = NULL;
1495 SRE_CODE* charset = NULL;
1496 SRE_CODE* overlap = NULL;
1497 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001498
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001499 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001500 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001501 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001502
1503 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001504
Gustavo Niemeyer28b5bb32003-06-26 14:41:08 +00001505 if (pattern[3] > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001506 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001507 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001508 end -= pattern[3]-1;
1509 if (end <= ptr)
1510 end = ptr+1;
1511 }
1512
Fredrik Lundh3562f112000-07-02 12:00:07 +00001513 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001514 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001515 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001516 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001517 prefix_skip = pattern[6];
1518 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001519 overlap = prefix + prefix_len - 1;
1520 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001521 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001522 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001523 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001524
1525 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001526 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001527
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001528 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1529 TRACE(("charset = %p\n", charset));
1530
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001531#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001532 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001533 /* pattern starts with a known prefix. use the overlap
1534 table to skip forward as fast as we possibly can */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001535 Py_ssize_t i = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001536 end = (SRE_CHAR *)state->end;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001537 while (ptr < end) {
1538 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001539 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001540 if (!i)
1541 break;
1542 else
1543 i = overlap[i];
1544 } else {
1545 if (++i == prefix_len) {
1546 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001547 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1548 state->start = ptr + 1 - prefix_len;
1549 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001550 if (flags & SRE_INFO_LITERAL)
1551 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001552 status = SRE_MATCH(state, pattern + 2*prefix_skip);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001553 if (status != 0)
1554 return status;
1555 /* close but no cigar -- try again */
1556 i = overlap[i];
1557 }
1558 break;
1559 }
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001560 }
1561 ptr++;
1562 }
1563 return 0;
1564 }
1565#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001566
Fredrik Lundh3562f112000-07-02 12:00:07 +00001567 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001568 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001569 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001570 SRE_CODE chr = pattern[1];
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001571 end = (SRE_CHAR *)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 for (;;) {
1573 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1574 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001575 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001576 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001577 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001578 state->start = ptr;
1579 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001580 if (flags & SRE_INFO_LITERAL)
1581 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001582 status = SRE_MATCH(state, pattern + 2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001583 if (status != 0)
1584 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001585 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001586 } else if (charset) {
1587 /* pattern starts with a character from a known set */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001588 end = (SRE_CHAR *)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001589 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001590 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001591 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001592 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001593 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001594 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001595 state->start = ptr;
1596 state->ptr = ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001597 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001598 if (status != 0)
1599 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001600 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001601 }
1602 } else
1603 /* general case */
1604 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001605 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001606 state->start = state->ptr = ptr++;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001607 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001608 if (status != 0)
1609 break;
1610 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001611
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001612 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001613}
Tim Peters3d563502006-01-21 02:47:53 +00001614
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001615LOCAL(int)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001616SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001617{
1618 /* check if given string is a literal template (i.e. no escapes) */
1619 while (len-- > 0)
1620 if (*ptr++ == '\\')
1621 return 0;
1622 return 1;
1623}
Guido van Rossumb700df92000-03-31 14:59:30 +00001624
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001625#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001626
1627/* -------------------------------------------------------------------- */
1628/* factories and destructors */
1629
1630/* see sre.h for object declarations */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001631static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1632static PyObject*pattern_scanner(PatternObject*, PyObject*);
Guido van Rossumb700df92000-03-31 14:59:30 +00001633
1634static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001635sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +00001636{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001637 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001638}
1639
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001640static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001641sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001642{
1643 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001644 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001645 return NULL;
1646 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001647 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001648 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001649#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001650 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001651#else
1652 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001653#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001654 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001655}
1656
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001657LOCAL(void)
1658state_reset(SRE_STATE* state)
1659{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001660 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001661 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001662
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001663 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001664 state->lastindex = -1;
1665
1666 state->repeat = NULL;
1667
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001668 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001669}
1670
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001671static void*
Benjamin Petersone48944b2012-03-07 14:50:25 -06001672getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize, Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +00001673{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001674 /* given a python object, return a data pointer, a length (in
1675 characters), and a character size. return NULL if the object
1676 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +00001677
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001678 PyBufferProcs *buffer;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001679 Py_ssize_t size, bytes;
1680 int charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001681 void* ptr;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001682
Alexandre Vassalotti70a23712007-10-14 02:05:51 +00001683 /* Unicode objects do not support the buffer API. So, get the data
1684 directly instead. */
1685 if (PyUnicode_Check(string)) {
1686 ptr = (void *)PyUnicode_AS_DATA(string);
1687 *p_length = PyUnicode_GET_SIZE(string);
1688 *p_charsize = sizeof(Py_UNICODE);
1689 return ptr;
1690 }
1691
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001692 /* get pointer to string buffer */
Benjamin Petersone48944b2012-03-07 14:50:25 -06001693 view->len = -1;
Christian Heimes90aa7642007-12-19 02:45:37 +00001694 buffer = Py_TYPE(string)->tp_as_buffer;
Antoine Pitroufd036452008-08-19 17:56:33 +00001695 if (!buffer || !buffer->bf_getbuffer ||
Benjamin Petersone48944b2012-03-07 14:50:25 -06001696 (*buffer->bf_getbuffer)(string, view, PyBUF_SIMPLE) < 0) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001697 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1698 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001699 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001700
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001701 /* determine buffer size */
Benjamin Petersone48944b2012-03-07 14:50:25 -06001702 bytes = view->len;
1703 ptr = view->buf;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001704
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001705 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001706 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001707 goto err;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001708 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001709
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001710 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001711 size = PyObject_Size(string);
Guido van Rossumb700df92000-03-31 14:59:30 +00001712
Christian Heimes72b710a2008-05-26 13:28:38 +00001713 if (PyBytes_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001714 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001715#if defined(HAVE_UNICODE)
Antoine Pitroufd036452008-08-19 17:56:33 +00001716 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001717 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001718#endif
1719 else {
1720 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001721 goto err;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001722 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001723
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001724 *p_length = size;
1725 *p_charsize = charsize;
1726
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001727 if (ptr == NULL) {
Antoine Pitroufd036452008-08-19 17:56:33 +00001728 PyErr_SetString(PyExc_ValueError,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001729 "Buffer is NULL");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001730 goto err;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001731 }
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001732 return ptr;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001733 err:
1734 PyBuffer_Release(view);
1735 view->buf = NULL;
1736 return NULL;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001737}
1738
1739LOCAL(PyObject*)
1740state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001741 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001742{
1743 /* prepare state object */
1744
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001745 Py_ssize_t length;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001746 int charsize;
1747 void* ptr;
1748
1749 memset(state, 0, sizeof(SRE_STATE));
1750
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001751 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001752 state->lastindex = -1;
1753
Benjamin Petersone48944b2012-03-07 14:50:25 -06001754 state->buffer.buf = NULL;
1755 ptr = getstring(string, &length, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001756 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -06001757 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001758
Benjamin Petersone48944b2012-03-07 14:50:25 -06001759 if (charsize == 1 && pattern->charsize > 1) {
1760 PyErr_SetString(PyExc_TypeError,
Antoine Pitroufd036452008-08-19 17:56:33 +00001761 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001762 goto err;
1763 }
1764 if (charsize > 1 && pattern->charsize == 1) {
1765 PyErr_SetString(PyExc_TypeError,
Antoine Pitroufd036452008-08-19 17:56:33 +00001766 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001767 goto err;
1768 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001769
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001770 /* adjust boundaries */
1771 if (start < 0)
1772 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001773 else if (start > length)
1774 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001775
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001776 if (end < 0)
1777 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001778 else if (end > length)
1779 end = length;
1780
1781 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001782
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001783 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001784
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001785 state->start = (void*) ((char*) ptr + start * state->charsize);
1786 state->end = (void*) ((char*) ptr + end * state->charsize);
1787
1788 Py_INCREF(string);
1789 state->string = string;
1790 state->pos = start;
1791 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001792
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001793 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001794 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001795 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001796#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001797 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001798#else
1799 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001800#endif
1801 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001802 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001803
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001804 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001805 err:
1806 if (state->buffer.buf)
1807 PyBuffer_Release(&state->buffer);
1808 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001809}
1810
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001811LOCAL(void)
1812state_fini(SRE_STATE* state)
1813{
Benjamin Petersone48944b2012-03-07 14:50:25 -06001814 if (state->buffer.buf)
1815 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001816 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001817 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001818}
1819
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001820/* calculate offset from start of string */
1821#define STATE_OFFSET(state, member)\
1822 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1823
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001824LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001825state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001826{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001827 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +00001828
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001829 index = (index - 1) * 2;
1830
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001831 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001832 if (empty)
1833 /* want empty string */
1834 i = j = 0;
1835 else {
1836 Py_INCREF(Py_None);
1837 return Py_None;
1838 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001839 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001840 i = STATE_OFFSET(state, state->mark[index]);
1841 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001842 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001843
Fredrik Lundh58100642000-08-09 09:14:35 +00001844 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001845}
1846
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001847static void
1848pattern_error(int status)
1849{
1850 switch (status) {
1851 case SRE_ERROR_RECURSION_LIMIT:
1852 PyErr_SetString(
1853 PyExc_RuntimeError,
1854 "maximum recursion limit exceeded"
1855 );
1856 break;
1857 case SRE_ERROR_MEMORY:
1858 PyErr_NoMemory();
1859 break;
Christian Heimes2380ac72008-01-09 00:17:24 +00001860 case SRE_ERROR_INTERRUPTED:
1861 /* An exception has already been raised, so let it fly */
1862 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001863 default:
1864 /* other error codes indicate compiler/engine bugs */
1865 PyErr_SetString(
1866 PyExc_RuntimeError,
1867 "internal error in regular expression engine"
1868 );
1869 }
1870}
1871
Guido van Rossumb700df92000-03-31 14:59:30 +00001872static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001873pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001874{
Raymond Hettinger027bb632004-05-31 03:09:25 +00001875 if (self->weakreflist != NULL)
1876 PyObject_ClearWeakRefs((PyObject *) self);
Benjamin Petersone48944b2012-03-07 14:50:25 -06001877 if (self->view.buf)
1878 PyBuffer_Release(&self->view);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001879 Py_XDECREF(self->pattern);
1880 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001881 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001882 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001883}
1884
1885static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001886pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001887{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001888 SRE_STATE state;
1889 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001890
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001891 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001892 Py_ssize_t start = 0;
1893 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001894 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001895 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001896 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001897 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001898
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001899 string = state_init(&state, self, string, start, end);
1900 if (!string)
1901 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001902
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001903 state.ptr = state.start;
1904
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001905 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1906
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001907 if (state.charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001908 status = sre_match(&state, PatternObject_GetCode(self));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001909 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001910#if defined(HAVE_UNICODE)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001911 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001912#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001913 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001914
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001915 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +00001916 if (PyErr_Occurred())
1917 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001918
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001919 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001920
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001921 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001922}
1923
1924static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001925pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001926{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001927 SRE_STATE state;
1928 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001929
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001930 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001931 Py_ssize_t start = 0;
1932 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001933 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001934 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001935 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001936 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001937
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001938 string = state_init(&state, self, string, start, end);
1939 if (!string)
1940 return NULL;
1941
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001942 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1943
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001944 if (state.charsize == 1) {
1945 status = sre_search(&state, PatternObject_GetCode(self));
1946 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001947#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001948 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001949#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001950 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001951
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001952 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1953
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001954 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001955
Thomas Wouters89f507f2006-12-13 04:49:30 +00001956 if (PyErr_Occurred())
1957 return NULL;
1958
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001959 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001960}
1961
1962static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001963call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001964{
1965 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001966 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001967 PyObject* func;
1968 PyObject* result;
1969
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001970 if (!args)
1971 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +00001972 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001973 if (!name)
1974 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001975 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001976 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001977 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001978 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001979 func = PyObject_GetAttrString(mod, function);
1980 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001981 if (!func)
1982 return NULL;
1983 result = PyObject_CallObject(func, args);
1984 Py_DECREF(func);
1985 Py_DECREF(args);
1986 return result;
1987}
1988
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001989#ifdef USE_BUILTIN_COPY
1990static int
1991deepcopy(PyObject** object, PyObject* memo)
1992{
1993 PyObject* copy;
1994
1995 copy = call(
1996 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001997 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001998 );
1999 if (!copy)
2000 return 0;
2001
2002 Py_DECREF(*object);
2003 *object = copy;
2004
2005 return 1; /* success */
2006}
2007#endif
2008
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002009static PyObject*
Thomas Wouters1b7f8912007-09-19 03:06:30 +00002010join_list(PyObject* list, PyObject* string)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002011{
2012 /* join list elements */
2013
2014 PyObject* joiner;
2015#if PY_VERSION_HEX >= 0x01060000
2016 PyObject* function;
2017 PyObject* args;
2018#endif
2019 PyObject* result;
2020
Thomas Wouters1b7f8912007-09-19 03:06:30 +00002021 joiner = PySequence_GetSlice(string, 0, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002022 if (!joiner)
2023 return NULL;
2024
Thomas Wouters1b7f8912007-09-19 03:06:30 +00002025 if (PyList_GET_SIZE(list) == 0) {
2026 Py_DECREF(list);
2027 return joiner;
2028 }
2029
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002030#if PY_VERSION_HEX >= 0x01060000
2031 function = PyObject_GetAttrString(joiner, "join");
2032 if (!function) {
2033 Py_DECREF(joiner);
2034 return NULL;
2035 }
2036 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002037 if (!args) {
2038 Py_DECREF(function);
2039 Py_DECREF(joiner);
2040 return NULL;
2041 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002042 PyTuple_SET_ITEM(args, 0, list);
2043 result = PyObject_CallObject(function, args);
2044 Py_DECREF(args); /* also removes list */
2045 Py_DECREF(function);
2046#else
2047 result = call(
2048 "string", "join",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002049 PyTuple_Pack(2, list, joiner)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002050 );
2051#endif
2052 Py_DECREF(joiner);
2053
2054 return result;
2055}
2056
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002057static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002058pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002059{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 SRE_STATE state;
2061 PyObject* list;
2062 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002063 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00002064
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002065 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002066 Py_ssize_t start = 0;
2067 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002068 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002069 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00002070 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002071 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002072
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002073 string = state_init(&state, self, string, start, end);
2074 if (!string)
2075 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002076
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002077 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002078 if (!list) {
2079 state_fini(&state);
2080 return NULL;
2081 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002082
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002083 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002084
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002085 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +00002086
Fredrik Lundhebc37b22000-10-28 19:30:41 +00002087 state_reset(&state);
2088
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002089 state.ptr = state.start;
2090
2091 if (state.charsize == 1) {
2092 status = sre_search(&state, PatternObject_GetCode(self));
2093 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002094#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002096#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002097 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002098
Thomas Wouters89f507f2006-12-13 04:49:30 +00002099 if (PyErr_Occurred())
2100 goto error;
2101
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002102 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002103 if (status == 0)
2104 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00002105 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002106 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002107 }
Tim Peters3d563502006-01-21 02:47:53 +00002108
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002109 /* don't bother to build a match object */
2110 switch (self->groups) {
2111 case 0:
2112 b = STATE_OFFSET(&state, state.start);
2113 e = STATE_OFFSET(&state, state.ptr);
2114 item = PySequence_GetSlice(string, b, e);
2115 if (!item)
2116 goto error;
2117 break;
2118 case 1:
2119 item = state_getslice(&state, 1, string, 1);
2120 if (!item)
2121 goto error;
2122 break;
2123 default:
2124 item = PyTuple_New(self->groups);
2125 if (!item)
2126 goto error;
2127 for (i = 0; i < self->groups; i++) {
2128 PyObject* o = state_getslice(&state, i+1, string, 1);
2129 if (!o) {
2130 Py_DECREF(item);
2131 goto error;
2132 }
2133 PyTuple_SET_ITEM(item, i, o);
2134 }
2135 break;
2136 }
2137
2138 status = PyList_Append(list, item);
2139 Py_DECREF(item);
2140 if (status < 0)
2141 goto error;
2142
2143 if (state.ptr == state.start)
2144 state.start = (void*) ((char*) state.ptr + state.charsize);
2145 else
2146 state.start = state.ptr;
2147
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002148 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002149
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 state_fini(&state);
2151 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00002152
2153error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002154 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002155 state_fini(&state);
2156 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002157
Guido van Rossumb700df92000-03-31 14:59:30 +00002158}
2159
Fredrik Lundh703ce812001-10-24 22:16:30 +00002160#if PY_VERSION_HEX >= 0x02020000
2161static PyObject*
2162pattern_finditer(PatternObject* pattern, PyObject* args)
2163{
2164 PyObject* scanner;
2165 PyObject* search;
2166 PyObject* iterator;
2167
2168 scanner = pattern_scanner(pattern, args);
2169 if (!scanner)
2170 return NULL;
2171
2172 search = PyObject_GetAttrString(scanner, "search");
2173 Py_DECREF(scanner);
2174 if (!search)
2175 return NULL;
2176
2177 iterator = PyCallIter_New(search, Py_None);
2178 Py_DECREF(search);
2179
2180 return iterator;
2181}
2182#endif
2183
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002184static PyObject*
2185pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2186{
2187 SRE_STATE state;
2188 PyObject* list;
2189 PyObject* item;
2190 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002191 Py_ssize_t n;
2192 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002193 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002194
2195 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002196 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002197 static char* kwlist[] = { "source", "maxsplit", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002198 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002199 &string, &maxsplit))
2200 return NULL;
2201
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002202 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002203 if (!string)
2204 return NULL;
2205
2206 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002207 if (!list) {
2208 state_fini(&state);
2209 return NULL;
2210 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002211
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002212 n = 0;
2213 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002214
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002215 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002216
2217 state_reset(&state);
2218
2219 state.ptr = state.start;
2220
2221 if (state.charsize == 1) {
2222 status = sre_search(&state, PatternObject_GetCode(self));
2223 } else {
2224#if defined(HAVE_UNICODE)
2225 status = sre_usearch(&state, PatternObject_GetCode(self));
2226#endif
2227 }
2228
Thomas Wouters89f507f2006-12-13 04:49:30 +00002229 if (PyErr_Occurred())
2230 goto error;
2231
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002232 if (status <= 0) {
2233 if (status == 0)
2234 break;
2235 pattern_error(status);
2236 goto error;
2237 }
Tim Peters3d563502006-01-21 02:47:53 +00002238
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002239 if (state.start == state.ptr) {
2240 if (last == state.end)
2241 break;
2242 /* skip one character */
2243 state.start = (void*) ((char*) state.ptr + state.charsize);
2244 continue;
2245 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002246
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002247 /* get segment before this match */
2248 item = PySequence_GetSlice(
2249 string, STATE_OFFSET(&state, last),
2250 STATE_OFFSET(&state, state.start)
2251 );
2252 if (!item)
2253 goto error;
2254 status = PyList_Append(list, item);
2255 Py_DECREF(item);
2256 if (status < 0)
2257 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002258
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002259 /* add groups (if any) */
2260 for (i = 0; i < self->groups; i++) {
2261 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002262 if (!item)
2263 goto error;
2264 status = PyList_Append(list, item);
2265 Py_DECREF(item);
2266 if (status < 0)
2267 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002268 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002269
2270 n = n + 1;
2271
2272 last = state.start = state.ptr;
2273
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002274 }
2275
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002276 /* get segment following last match (even if empty) */
2277 item = PySequence_GetSlice(
2278 string, STATE_OFFSET(&state, last), state.endpos
2279 );
2280 if (!item)
2281 goto error;
2282 status = PyList_Append(list, item);
2283 Py_DECREF(item);
2284 if (status < 0)
2285 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002286
2287 state_fini(&state);
2288 return list;
2289
2290error:
2291 Py_DECREF(list);
2292 state_fini(&state);
2293 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002294
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002295}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002296
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002297static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002298pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002299 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002300{
2301 SRE_STATE state;
2302 PyObject* list;
2303 PyObject* item;
2304 PyObject* filter;
2305 PyObject* args;
2306 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002307 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002308 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002309 Py_ssize_t n;
2310 Py_ssize_t i, b, e;
2311 int bint;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002312 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06002313 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002314
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002315 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002316 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002317 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002318 Py_INCREF(filter);
2319 filter_is_callable = 1;
2320 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002321 /* if not callable, check if it's a literal string */
2322 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06002323 view.buf = NULL;
2324 ptr = getstring(ptemplate, &n, &bint, &view);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002325 b = bint;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002326 if (ptr) {
2327 if (b == 1) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002328 literal = sre_literal_template((unsigned char *)ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002329 } else {
2330#if defined(HAVE_UNICODE)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002331 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002332#endif
2333 }
2334 } else {
2335 PyErr_Clear();
2336 literal = 0;
2337 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06002338 if (view.buf)
2339 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002340 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002341 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002342 Py_INCREF(filter);
2343 filter_is_callable = 0;
2344 } else {
2345 /* not a literal; hand it over to the template compiler */
2346 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002347 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002348 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002349 );
2350 if (!filter)
2351 return NULL;
2352 filter_is_callable = PyCallable_Check(filter);
2353 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002354 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002355
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002357 if (!string) {
2358 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002359 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002360 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002361
2362 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002363 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002364 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002365 state_fini(&state);
2366 return NULL;
2367 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002368
2369 n = i = 0;
2370
2371 while (!count || n < count) {
2372
2373 state_reset(&state);
2374
2375 state.ptr = state.start;
2376
2377 if (state.charsize == 1) {
2378 status = sre_search(&state, PatternObject_GetCode(self));
2379 } else {
2380#if defined(HAVE_UNICODE)
2381 status = sre_usearch(&state, PatternObject_GetCode(self));
2382#endif
2383 }
2384
Thomas Wouters89f507f2006-12-13 04:49:30 +00002385 if (PyErr_Occurred())
2386 goto error;
2387
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002388 if (status <= 0) {
2389 if (status == 0)
2390 break;
2391 pattern_error(status);
2392 goto error;
2393 }
Tim Peters3d563502006-01-21 02:47:53 +00002394
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002395 b = STATE_OFFSET(&state, state.start);
2396 e = STATE_OFFSET(&state, state.ptr);
2397
2398 if (i < b) {
2399 /* get segment before this match */
2400 item = PySequence_GetSlice(string, i, b);
2401 if (!item)
2402 goto error;
2403 status = PyList_Append(list, item);
2404 Py_DECREF(item);
2405 if (status < 0)
2406 goto error;
2407
2408 } else if (i == b && i == e && n > 0)
2409 /* ignore empty match on latest position */
2410 goto next;
2411
2412 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002413 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002414 match = pattern_new_match(self, &state, 1);
2415 if (!match)
2416 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002417 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002418 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002419 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002420 goto error;
2421 }
2422 item = PyObject_CallObject(filter, args);
2423 Py_DECREF(args);
2424 Py_DECREF(match);
2425 if (!item)
2426 goto error;
2427 } else {
2428 /* filter is literal string */
2429 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002430 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002431 }
2432
2433 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002434 if (item != Py_None) {
2435 status = PyList_Append(list, item);
2436 Py_DECREF(item);
2437 if (status < 0)
2438 goto error;
2439 }
Tim Peters3d563502006-01-21 02:47:53 +00002440
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002441 i = e;
2442 n = n + 1;
2443
2444next:
2445 /* move on */
2446 if (state.ptr == state.start)
2447 state.start = (void*) ((char*) state.ptr + state.charsize);
2448 else
2449 state.start = state.ptr;
2450
2451 }
2452
2453 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002454 if (i < state.endpos) {
2455 item = PySequence_GetSlice(string, i, state.endpos);
2456 if (!item)
2457 goto error;
2458 status = PyList_Append(list, item);
2459 Py_DECREF(item);
2460 if (status < 0)
2461 goto error;
2462 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002463
2464 state_fini(&state);
2465
Guido van Rossum4e173842001-12-07 04:25:10 +00002466 Py_DECREF(filter);
2467
Fredrik Lundhdac58492001-10-21 21:48:30 +00002468 /* convert list to single string (also removes list) */
Thomas Wouters1b7f8912007-09-19 03:06:30 +00002469 item = join_list(list, string);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002470
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002471 if (!item)
2472 return NULL;
2473
2474 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002475 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002476
2477 return item;
2478
2479error:
2480 Py_DECREF(list);
2481 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002482 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002483 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002484
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002485}
2486
2487static PyObject*
2488pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2489{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002490 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002491 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002492 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002493 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002494 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002495 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002496 return NULL;
2497
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002498 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002499}
2500
2501static PyObject*
2502pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2503{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002504 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002505 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002506 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002507 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002508 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002509 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002510 return NULL;
2511
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002512 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002513}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002514
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002515static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002516pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002517{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002518#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002519 PatternObject* copy;
2520 int offset;
2521
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002522 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2523 if (!copy)
2524 return NULL;
2525
2526 offset = offsetof(PatternObject, groups);
2527
2528 Py_XINCREF(self->groupindex);
2529 Py_XINCREF(self->indexgroup);
2530 Py_XINCREF(self->pattern);
2531
2532 memcpy((char*) copy + offset, (char*) self + offset,
2533 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00002534 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002535
2536 return (PyObject*) copy;
2537#else
2538 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2539 return NULL;
2540#endif
2541}
2542
2543static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002544pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002545{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002546#ifdef USE_BUILTIN_COPY
2547 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002548
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002549 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002550 if (!copy)
2551 return NULL;
2552
2553 if (!deepcopy(&copy->groupindex, memo) ||
2554 !deepcopy(&copy->indexgroup, memo) ||
2555 !deepcopy(&copy->pattern, memo)) {
2556 Py_DECREF(copy);
2557 return NULL;
2558 }
2559
2560#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002561 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2562 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002563#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002564}
2565
Raymond Hettinger94478742004-09-24 04:31:19 +00002566PyDoc_STRVAR(pattern_match_doc,
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002567"match(string[, pos[, endpos]]) -> match object or None.\n\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00002568 Matches zero or more characters at the beginning of the string");
2569
2570PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002571"search(string[, pos[, endpos]]) -> match object or None.\n\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00002572 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02002573 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002574
2575PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002576"split(string[, maxsplit = 0]) -> list.\n\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00002577 Split string by the occurrences of pattern.");
2578
2579PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002580"findall(string[, pos[, endpos]]) -> list.\n\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00002581 Return a list of all non-overlapping matches of pattern in string.");
2582
2583PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002584"finditer(string[, pos[, endpos]]) -> iterator.\n\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00002585 Return an iterator over all non-overlapping matches for the \n\
2586 RE pattern in string. For each match, the iterator returns a\n\
2587 match object.");
2588
2589PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002590"sub(repl, string[, count = 0]) -> newstring.\n\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00002591 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00002592 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002593
2594PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002595"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00002596 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2597 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00002598 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002599
2600PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2601
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002602static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00002603 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002604 pattern_match_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002605 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002606 pattern_search_doc},
2607 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
2608 pattern_sub_doc},
2609 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
2610 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002611 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002612 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002613 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002614 pattern_findall_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002615#if PY_VERSION_HEX >= 0x02020000
Raymond Hettinger94478742004-09-24 04:31:19 +00002616 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
2617 pattern_finditer_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002618#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002619 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002620 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2621 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002622 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002623};
2624
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002625#define PAT_OFF(x) offsetof(PatternObject, x)
2626static PyMemberDef pattern_members[] = {
2627 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2628 {"flags", T_INT, PAT_OFF(flags), READONLY},
2629 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2630 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
2631 {NULL} /* Sentinel */
2632};
Guido van Rossumb700df92000-03-31 14:59:30 +00002633
Neal Norwitz57c179c2006-03-22 07:18:02 +00002634static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002635 PyVarObject_HEAD_INIT(NULL, 0)
2636 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002637 sizeof(PatternObject), sizeof(SRE_CODE),
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002638 (destructor)pattern_dealloc, /* tp_dealloc */
2639 0, /* tp_print */
2640 0, /* tp_getattr */
Raymond Hettinger027bb632004-05-31 03:09:25 +00002641 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00002642 0, /* tp_reserved */
Raymond Hettinger027bb632004-05-31 03:09:25 +00002643 0, /* tp_repr */
2644 0, /* tp_as_number */
2645 0, /* tp_as_sequence */
2646 0, /* tp_as_mapping */
2647 0, /* tp_hash */
2648 0, /* tp_call */
2649 0, /* tp_str */
2650 0, /* tp_getattro */
2651 0, /* tp_setattro */
2652 0, /* tp_as_buffer */
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002653 Py_TPFLAGS_DEFAULT, /* tp_flags */
Raymond Hettinger94478742004-09-24 04:31:19 +00002654 pattern_doc, /* tp_doc */
Raymond Hettinger027bb632004-05-31 03:09:25 +00002655 0, /* tp_traverse */
2656 0, /* tp_clear */
2657 0, /* tp_richcompare */
2658 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002659 0, /* tp_iter */
2660 0, /* tp_iternext */
2661 pattern_methods, /* tp_methods */
2662 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00002663};
2664
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002665static int _validate(PatternObject *self); /* Forward */
2666
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002667static PyObject *
2668_compile(PyObject* self_, PyObject* args)
2669{
2670 /* "compile" pattern descriptor to pattern object */
2671
2672 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002673 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002674
2675 PyObject* pattern;
2676 int flags = 0;
2677 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002678 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002679 PyObject* groupindex = NULL;
2680 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06002681
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002682 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002683 &PyList_Type, &code, &groups,
2684 &groupindex, &indexgroup))
2685 return NULL;
2686
2687 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00002688 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002689 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2690 if (!self)
2691 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002692 self->weakreflist = NULL;
2693 self->pattern = NULL;
2694 self->groupindex = NULL;
2695 self->indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06002696 self->view.buf = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002697
2698 self->codesize = n;
2699
2700 for (i = 0; i < n; i++) {
2701 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00002702 unsigned long value = PyLong_AsUnsignedLong(o);
Antoine Pitrou39bdad82012-11-20 22:30:42 +01002703 if (value == (unsigned long)-1 && PyErr_Occurred()) {
2704 if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
2705 PyErr_SetString(PyExc_OverflowError,
2706 "regular expression code size limit exceeded");
2707 }
2708 break;
2709 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002710 self->code[i] = (SRE_CODE) value;
2711 if ((unsigned long) self->code[i] != value) {
2712 PyErr_SetString(PyExc_OverflowError,
2713 "regular expression code size limit exceeded");
2714 break;
2715 }
2716 }
2717
2718 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002719 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002720 return NULL;
2721 }
2722
Benjamin Petersone48944b2012-03-07 14:50:25 -06002723 if (pattern == Py_None)
2724 self->charsize = -1;
2725 else {
2726 Py_ssize_t p_length;
2727 if (!getstring(pattern, &p_length, &self->charsize, &self->view)) {
2728 Py_DECREF(self);
2729 return NULL;
2730 }
2731 }
Antoine Pitroufd036452008-08-19 17:56:33 +00002732
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002733 Py_INCREF(pattern);
2734 self->pattern = pattern;
2735
2736 self->flags = flags;
2737
2738 self->groups = groups;
2739
2740 Py_XINCREF(groupindex);
2741 self->groupindex = groupindex;
2742
2743 Py_XINCREF(indexgroup);
2744 self->indexgroup = indexgroup;
2745
2746 self->weakreflist = NULL;
2747
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002748 if (!_validate(self)) {
2749 Py_DECREF(self);
2750 return NULL;
2751 }
2752
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002753 return (PyObject*) self;
2754}
2755
Guido van Rossumb700df92000-03-31 14:59:30 +00002756/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002757/* Code validation */
2758
2759/* To learn more about this code, have a look at the _compile() function in
2760 Lib/sre_compile.py. The validation functions below checks the code array
2761 for conformance with the code patterns generated there.
2762
2763 The nice thing about the generated code is that it is position-independent:
2764 all jumps are relative jumps forward. Also, jumps don't cross each other:
2765 the target of a later jump is always earlier than the target of an earlier
2766 jump. IOW, this is okay:
2767
2768 J---------J-------T--------T
2769 \ \_____/ /
2770 \______________________/
2771
2772 but this is not:
2773
2774 J---------J-------T--------T
2775 \_________\_____/ /
2776 \____________/
2777
2778 It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4
2779 bytes wide (the latter if Python is compiled for "wide" unicode support).
2780*/
2781
2782/* Defining this one enables tracing of the validator */
2783#undef VVERBOSE
2784
2785/* Trace macro for the validator */
2786#if defined(VVERBOSE)
2787#define VTRACE(v) printf v
2788#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08002789#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002790#endif
2791
2792/* Report failure */
2793#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
2794
2795/* Extract opcode, argument, or skip count from code array */
2796#define GET_OP \
2797 do { \
2798 VTRACE(("%p: ", code)); \
2799 if (code >= end) FAIL; \
2800 op = *code++; \
2801 VTRACE(("%lu (op)\n", (unsigned long)op)); \
2802 } while (0)
2803#define GET_ARG \
2804 do { \
2805 VTRACE(("%p= ", code)); \
2806 if (code >= end) FAIL; \
2807 arg = *code++; \
2808 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
2809 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00002810#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002811 do { \
2812 VTRACE(("%p= ", code)); \
2813 if (code >= end) FAIL; \
2814 skip = *code; \
2815 VTRACE(("%lu (skip to %p)\n", \
2816 (unsigned long)skip, code+skip)); \
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00002817 if (code+skip-adj < code || code+skip-adj > end)\
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002818 FAIL; \
2819 code++; \
2820 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00002821#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002822
2823static int
2824_validate_charset(SRE_CODE *code, SRE_CODE *end)
2825{
2826 /* Some variables are manipulated by the macros above */
2827 SRE_CODE op;
2828 SRE_CODE arg;
2829 SRE_CODE offset;
2830 int i;
2831
2832 while (code < end) {
2833 GET_OP;
2834 switch (op) {
2835
2836 case SRE_OP_NEGATE:
2837 break;
2838
2839 case SRE_OP_LITERAL:
2840 GET_ARG;
2841 break;
2842
2843 case SRE_OP_RANGE:
2844 GET_ARG;
2845 GET_ARG;
2846 break;
2847
2848 case SRE_OP_CHARSET:
2849 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
2850 if (code+offset < code || code+offset > end)
2851 FAIL;
2852 code += offset;
2853 break;
2854
2855 case SRE_OP_BIGCHARSET:
2856 GET_ARG; /* Number of blocks */
2857 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
2858 if (code+offset < code || code+offset > end)
2859 FAIL;
2860 /* Make sure that each byte points to a valid block */
2861 for (i = 0; i < 256; i++) {
2862 if (((unsigned char *)code)[i] >= arg)
2863 FAIL;
2864 }
2865 code += offset;
2866 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
2867 if (code+offset < code || code+offset > end)
2868 FAIL;
2869 code += offset;
2870 break;
2871
2872 case SRE_OP_CATEGORY:
2873 GET_ARG;
2874 switch (arg) {
2875 case SRE_CATEGORY_DIGIT:
2876 case SRE_CATEGORY_NOT_DIGIT:
2877 case SRE_CATEGORY_SPACE:
2878 case SRE_CATEGORY_NOT_SPACE:
2879 case SRE_CATEGORY_WORD:
2880 case SRE_CATEGORY_NOT_WORD:
2881 case SRE_CATEGORY_LINEBREAK:
2882 case SRE_CATEGORY_NOT_LINEBREAK:
2883 case SRE_CATEGORY_LOC_WORD:
2884 case SRE_CATEGORY_LOC_NOT_WORD:
2885 case SRE_CATEGORY_UNI_DIGIT:
2886 case SRE_CATEGORY_UNI_NOT_DIGIT:
2887 case SRE_CATEGORY_UNI_SPACE:
2888 case SRE_CATEGORY_UNI_NOT_SPACE:
2889 case SRE_CATEGORY_UNI_WORD:
2890 case SRE_CATEGORY_UNI_NOT_WORD:
2891 case SRE_CATEGORY_UNI_LINEBREAK:
2892 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
2893 break;
2894 default:
2895 FAIL;
2896 }
2897 break;
2898
2899 default:
2900 FAIL;
2901
2902 }
2903 }
2904
2905 return 1;
2906}
2907
2908static int
2909_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2910{
2911 /* Some variables are manipulated by the macros above */
2912 SRE_CODE op;
2913 SRE_CODE arg;
2914 SRE_CODE skip;
2915
2916 VTRACE(("code=%p, end=%p\n", code, end));
2917
2918 if (code > end)
2919 FAIL;
2920
2921 while (code < end) {
2922 GET_OP;
2923 switch (op) {
2924
2925 case SRE_OP_MARK:
2926 /* We don't check whether marks are properly nested; the
2927 sre_match() code is robust even if they don't, and the worst
2928 you can get is nonsensical match results. */
2929 GET_ARG;
2930 if (arg > 2*groups+1) {
2931 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
2932 FAIL;
2933 }
2934 break;
2935
2936 case SRE_OP_LITERAL:
2937 case SRE_OP_NOT_LITERAL:
2938 case SRE_OP_LITERAL_IGNORE:
2939 case SRE_OP_NOT_LITERAL_IGNORE:
2940 GET_ARG;
2941 /* The arg is just a character, nothing to check */
2942 break;
2943
2944 case SRE_OP_SUCCESS:
2945 case SRE_OP_FAILURE:
2946 /* Nothing to check; these normally end the matching process */
2947 break;
2948
2949 case SRE_OP_AT:
2950 GET_ARG;
2951 switch (arg) {
2952 case SRE_AT_BEGINNING:
2953 case SRE_AT_BEGINNING_STRING:
2954 case SRE_AT_BEGINNING_LINE:
2955 case SRE_AT_END:
2956 case SRE_AT_END_LINE:
2957 case SRE_AT_END_STRING:
2958 case SRE_AT_BOUNDARY:
2959 case SRE_AT_NON_BOUNDARY:
2960 case SRE_AT_LOC_BOUNDARY:
2961 case SRE_AT_LOC_NON_BOUNDARY:
2962 case SRE_AT_UNI_BOUNDARY:
2963 case SRE_AT_UNI_NON_BOUNDARY:
2964 break;
2965 default:
2966 FAIL;
2967 }
2968 break;
2969
2970 case SRE_OP_ANY:
2971 case SRE_OP_ANY_ALL:
2972 /* These have no operands */
2973 break;
2974
2975 case SRE_OP_IN:
2976 case SRE_OP_IN_IGNORE:
2977 GET_SKIP;
2978 /* Stop 1 before the end; we check the FAILURE below */
2979 if (!_validate_charset(code, code+skip-2))
2980 FAIL;
2981 if (code[skip-2] != SRE_OP_FAILURE)
2982 FAIL;
2983 code += skip-1;
2984 break;
2985
2986 case SRE_OP_INFO:
2987 {
2988 /* A minimal info field is
2989 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
2990 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
2991 more follows. */
2992 SRE_CODE flags, min, max, i;
2993 SRE_CODE *newcode;
2994 GET_SKIP;
2995 newcode = code+skip-1;
2996 GET_ARG; flags = arg;
2997 GET_ARG; min = arg;
2998 GET_ARG; max = arg;
2999 /* Check that only valid flags are present */
3000 if ((flags & ~(SRE_INFO_PREFIX |
3001 SRE_INFO_LITERAL |
3002 SRE_INFO_CHARSET)) != 0)
3003 FAIL;
3004 /* PREFIX and CHARSET are mutually exclusive */
3005 if ((flags & SRE_INFO_PREFIX) &&
3006 (flags & SRE_INFO_CHARSET))
3007 FAIL;
3008 /* LITERAL implies PREFIX */
3009 if ((flags & SRE_INFO_LITERAL) &&
3010 !(flags & SRE_INFO_PREFIX))
3011 FAIL;
3012 /* Validate the prefix */
3013 if (flags & SRE_INFO_PREFIX) {
3014 SRE_CODE prefix_len, prefix_skip;
3015 GET_ARG; prefix_len = arg;
3016 GET_ARG; prefix_skip = arg;
3017 /* Here comes the prefix string */
3018 if (code+prefix_len < code || code+prefix_len > newcode)
3019 FAIL;
3020 code += prefix_len;
3021 /* And here comes the overlap table */
3022 if (code+prefix_len < code || code+prefix_len > newcode)
3023 FAIL;
3024 /* Each overlap value should be < prefix_len */
3025 for (i = 0; i < prefix_len; i++) {
3026 if (code[i] >= prefix_len)
3027 FAIL;
3028 }
3029 code += prefix_len;
3030 }
3031 /* Validate the charset */
3032 if (flags & SRE_INFO_CHARSET) {
3033 if (!_validate_charset(code, newcode-1))
3034 FAIL;
3035 if (newcode[-1] != SRE_OP_FAILURE)
3036 FAIL;
3037 code = newcode;
3038 }
3039 else if (code != newcode) {
3040 VTRACE(("code=%p, newcode=%p\n", code, newcode));
3041 FAIL;
3042 }
3043 }
3044 break;
3045
3046 case SRE_OP_BRANCH:
3047 {
3048 SRE_CODE *target = NULL;
3049 for (;;) {
3050 GET_SKIP;
3051 if (skip == 0)
3052 break;
3053 /* Stop 2 before the end; we check the JUMP below */
3054 if (!_validate_inner(code, code+skip-3, groups))
3055 FAIL;
3056 code += skip-3;
3057 /* Check that it ends with a JUMP, and that each JUMP
3058 has the same target */
3059 GET_OP;
3060 if (op != SRE_OP_JUMP)
3061 FAIL;
3062 GET_SKIP;
3063 if (target == NULL)
3064 target = code+skip-1;
3065 else if (code+skip-1 != target)
3066 FAIL;
3067 }
3068 }
3069 break;
3070
3071 case SRE_OP_REPEAT_ONE:
3072 case SRE_OP_MIN_REPEAT_ONE:
3073 {
3074 SRE_CODE min, max;
3075 GET_SKIP;
3076 GET_ARG; min = arg;
3077 GET_ARG; max = arg;
3078 if (min > max)
3079 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02003080 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003081 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003082 if (!_validate_inner(code, code+skip-4, groups))
3083 FAIL;
3084 code += skip-4;
3085 GET_OP;
3086 if (op != SRE_OP_SUCCESS)
3087 FAIL;
3088 }
3089 break;
3090
3091 case SRE_OP_REPEAT:
3092 {
3093 SRE_CODE min, max;
3094 GET_SKIP;
3095 GET_ARG; min = arg;
3096 GET_ARG; max = arg;
3097 if (min > max)
3098 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02003099 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003100 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003101 if (!_validate_inner(code, code+skip-3, groups))
3102 FAIL;
3103 code += skip-3;
3104 GET_OP;
3105 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
3106 FAIL;
3107 }
3108 break;
3109
3110 case SRE_OP_GROUPREF:
3111 case SRE_OP_GROUPREF_IGNORE:
3112 GET_ARG;
3113 if (arg >= groups)
3114 FAIL;
3115 break;
3116
3117 case SRE_OP_GROUPREF_EXISTS:
3118 /* The regex syntax for this is: '(?(group)then|else)', where
3119 'group' is either an integer group number or a group name,
3120 'then' and 'else' are sub-regexes, and 'else' is optional. */
3121 GET_ARG;
3122 if (arg >= groups)
3123 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00003124 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003125 code--; /* The skip is relative to the first arg! */
3126 /* There are two possibilities here: if there is both a 'then'
3127 part and an 'else' part, the generated code looks like:
3128
3129 GROUPREF_EXISTS
3130 <group>
3131 <skipyes>
3132 ...then part...
3133 JUMP
3134 <skipno>
3135 (<skipyes> jumps here)
3136 ...else part...
3137 (<skipno> jumps here)
3138
3139 If there is only a 'then' part, it looks like:
3140
3141 GROUPREF_EXISTS
3142 <group>
3143 <skip>
3144 ...then part...
3145 (<skip> jumps here)
3146
3147 There is no direct way to decide which it is, and we don't want
3148 to allow arbitrary jumps anywhere in the code; so we just look
3149 for a JUMP opcode preceding our skip target.
3150 */
3151 if (skip >= 3 && code+skip-3 >= code &&
3152 code[skip-3] == SRE_OP_JUMP)
3153 {
3154 VTRACE(("both then and else parts present\n"));
3155 if (!_validate_inner(code+1, code+skip-3, groups))
3156 FAIL;
3157 code += skip-2; /* Position after JUMP, at <skipno> */
3158 GET_SKIP;
3159 if (!_validate_inner(code, code+skip-1, groups))
3160 FAIL;
3161 code += skip-1;
3162 }
3163 else {
3164 VTRACE(("only a then part present\n"));
3165 if (!_validate_inner(code+1, code+skip-1, groups))
3166 FAIL;
3167 code += skip-1;
3168 }
3169 break;
3170
3171 case SRE_OP_ASSERT:
3172 case SRE_OP_ASSERT_NOT:
3173 GET_SKIP;
3174 GET_ARG; /* 0 for lookahead, width for lookbehind */
3175 code--; /* Back up over arg to simplify math below */
3176 if (arg & 0x80000000)
3177 FAIL; /* Width too large */
3178 /* Stop 1 before the end; we check the SUCCESS below */
3179 if (!_validate_inner(code+1, code+skip-2, groups))
3180 FAIL;
3181 code += skip-2;
3182 GET_OP;
3183 if (op != SRE_OP_SUCCESS)
3184 FAIL;
3185 break;
3186
3187 default:
3188 FAIL;
3189
3190 }
3191 }
3192
3193 VTRACE(("okay\n"));
3194 return 1;
3195}
3196
3197static int
3198_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
3199{
3200 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
3201 FAIL;
3202 if (groups == 0) /* fix for simplejson */
3203 groups = 100; /* 100 groups should always be safe */
3204 return _validate_inner(code, end-1, groups);
3205}
3206
3207static int
3208_validate(PatternObject *self)
3209{
3210 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
3211 {
3212 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
3213 return 0;
3214 }
3215 else
3216 VTRACE(("Success!\n"));
3217 return 1;
3218}
3219
3220/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00003221/* match methods */
3222
3223static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003224match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00003225{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003226 Py_XDECREF(self->regs);
3227 Py_XDECREF(self->string);
3228 Py_DECREF(self->pattern);
3229 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00003230}
3231
3232static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003233match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00003234{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003235 if (index < 0 || index >= self->groups) {
3236 /* raise IndexError if we were given a bad group number */
3237 PyErr_SetString(
3238 PyExc_IndexError,
3239 "no such group"
3240 );
3241 return NULL;
3242 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003243
Fredrik Lundh6f013982000-07-03 18:44:21 +00003244 index *= 2;
3245
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003246 if (self->string == Py_None || self->mark[index] < 0) {
3247 /* return default value if the string or group is undefined */
3248 Py_INCREF(def);
3249 return def;
3250 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003251
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003252 return PySequence_GetSlice(
3253 self->string, self->mark[index], self->mark[index+1]
3254 );
Guido van Rossumb700df92000-03-31 14:59:30 +00003255}
3256
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003257static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003258match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00003259{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003260 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00003261
Guido van Rossumddefaf32007-01-14 03:31:43 +00003262 if (index == NULL)
3263 /* Default value */
3264 return 0;
3265
Christian Heimes217cfd12007-12-02 14:31:20 +00003266 if (PyLong_Check(index))
3267 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00003268
Fredrik Lundh6f013982000-07-03 18:44:21 +00003269 i = -1;
3270
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003271 if (self->pattern->groupindex) {
3272 index = PyObject_GetItem(self->pattern->groupindex, index);
3273 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00003274 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00003275 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00003276 Py_DECREF(index);
3277 } else
3278 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003279 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00003280
3281 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003282}
3283
3284static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00003285match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003286{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003287 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00003288}
3289
3290static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003291match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003292{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003293 /* delegate to Python code */
3294 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00003295 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003296 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003297 );
3298}
3299
3300static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003301match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003302{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003303 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003304 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00003305
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003306 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00003307
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003308 switch (size) {
3309 case 0:
3310 result = match_getslice(self, Py_False, Py_None);
3311 break;
3312 case 1:
3313 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
3314 break;
3315 default:
3316 /* fetch multiple items */
3317 result = PyTuple_New(size);
3318 if (!result)
3319 return NULL;
3320 for (i = 0; i < size; i++) {
3321 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00003322 self, PyTuple_GET_ITEM(args, i), Py_None
3323 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003324 if (!item) {
3325 Py_DECREF(result);
3326 return NULL;
3327 }
3328 PyTuple_SET_ITEM(result, i, item);
3329 }
3330 break;
3331 }
3332 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003333}
3334
3335static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00003336match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00003337{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003338 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003339 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00003340
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003341 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003342 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00003343 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003344 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003345
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003346 result = PyTuple_New(self->groups-1);
3347 if (!result)
3348 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003349
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003350 for (index = 1; index < self->groups; index++) {
3351 PyObject* item;
3352 item = match_getslice_by_index(self, index, def);
3353 if (!item) {
3354 Py_DECREF(result);
3355 return NULL;
3356 }
3357 PyTuple_SET_ITEM(result, index-1, item);
3358 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003359
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003360 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003361}
3362
3363static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00003364match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00003365{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003366 PyObject* result;
3367 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003368 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00003369
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003370 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003371 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00003372 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003373 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003374
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003375 result = PyDict_New();
3376 if (!result || !self->pattern->groupindex)
3377 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003378
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003379 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003380 if (!keys)
3381 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00003382
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003383 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00003384 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003385 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00003386 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003387 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003388 if (!key)
3389 goto failed;
3390 value = match_getslice(self, key, def);
3391 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003392 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003393 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003394 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00003395 status = PyDict_SetItem(result, key, value);
3396 Py_DECREF(value);
3397 if (status < 0)
3398 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003399 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003400
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003401 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00003402
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003403 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00003404
3405failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00003406 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003407 Py_DECREF(result);
3408 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003409}
3410
3411static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003412match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003413{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003414 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003415
Guido van Rossumddefaf32007-01-14 03:31:43 +00003416 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003417 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003418 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003419
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003420 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003421
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003422 if (index < 0 || index >= self->groups) {
3423 PyErr_SetString(
3424 PyExc_IndexError,
3425 "no such group"
3426 );
3427 return NULL;
3428 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003429
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003430 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01003431 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00003432}
3433
3434static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003435match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003436{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003437 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003438
Guido van Rossumddefaf32007-01-14 03:31:43 +00003439 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003440 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003441 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003442
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003443 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003444
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003445 if (index < 0 || index >= self->groups) {
3446 PyErr_SetString(
3447 PyExc_IndexError,
3448 "no such group"
3449 );
3450 return NULL;
3451 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003452
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003453 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01003454 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003455}
3456
3457LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003458_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003459{
3460 PyObject* pair;
3461 PyObject* item;
3462
3463 pair = PyTuple_New(2);
3464 if (!pair)
3465 return NULL;
3466
Christian Heimes217cfd12007-12-02 14:31:20 +00003467 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003468 if (!item)
3469 goto error;
3470 PyTuple_SET_ITEM(pair, 0, item);
3471
Christian Heimes217cfd12007-12-02 14:31:20 +00003472 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003473 if (!item)
3474 goto error;
3475 PyTuple_SET_ITEM(pair, 1, item);
3476
3477 return pair;
3478
3479 error:
3480 Py_DECREF(pair);
3481 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003482}
3483
3484static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003485match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003486{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003487 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003488
Guido van Rossumddefaf32007-01-14 03:31:43 +00003489 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003490 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003491 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003492
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003493 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003494
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003495 if (index < 0 || index >= self->groups) {
3496 PyErr_SetString(
3497 PyExc_IndexError,
3498 "no such group"
3499 );
3500 return NULL;
3501 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003502
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003503 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003504 return _pair(self->mark[index*2], self->mark[index*2+1]);
3505}
3506
3507static PyObject*
3508match_regs(MatchObject* self)
3509{
3510 PyObject* regs;
3511 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003512 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003513
3514 regs = PyTuple_New(self->groups);
3515 if (!regs)
3516 return NULL;
3517
3518 for (index = 0; index < self->groups; index++) {
3519 item = _pair(self->mark[index*2], self->mark[index*2+1]);
3520 if (!item) {
3521 Py_DECREF(regs);
3522 return NULL;
3523 }
3524 PyTuple_SET_ITEM(regs, index, item);
3525 }
3526
3527 Py_INCREF(regs);
3528 self->regs = regs;
3529
3530 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00003531}
3532
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003533static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003534match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003535{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003536#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003537 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003538 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00003539
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003540 slots = 2 * (self->pattern->groups+1);
3541
3542 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3543 if (!copy)
3544 return NULL;
3545
3546 /* this value a constant, but any compiler should be able to
3547 figure that out all by itself */
3548 offset = offsetof(MatchObject, string);
3549
3550 Py_XINCREF(self->pattern);
3551 Py_XINCREF(self->string);
3552 Py_XINCREF(self->regs);
3553
3554 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003555 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003556
3557 return (PyObject*) copy;
3558#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003559 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003560 return NULL;
3561#endif
3562}
3563
3564static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003565match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003566{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003567#ifdef USE_BUILTIN_COPY
3568 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00003569
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003570 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003571 if (!copy)
3572 return NULL;
3573
3574 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3575 !deepcopy(&copy->string, memo) ||
3576 !deepcopy(&copy->regs, memo)) {
3577 Py_DECREF(copy);
3578 return NULL;
3579 }
3580
3581#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003582 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3583 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003584#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003585}
3586
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02003587PyDoc_STRVAR(match_doc,
3588"The result of re.match() and re.search().\n\
3589Match objects always have a boolean value of True.");
3590
3591PyDoc_STRVAR(match_group_doc,
3592"group([group1, ...]) -> str or tuple.\n\n\
3593 Return subgroup(s) of the match by indices or names.\n\
3594 For 0 returns the entire match.");
3595
3596PyDoc_STRVAR(match_start_doc,
3597"start([group=0]) -> int.\n\n\
3598 Return index of the start of the substring matched by group.");
3599
3600PyDoc_STRVAR(match_end_doc,
3601"end([group=0]) -> int.\n\n\
3602 Return index of the end of the substring matched by group.");
3603
3604PyDoc_STRVAR(match_span_doc,
3605"span([group]) -> tuple.\n\n\
3606 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
3607
3608PyDoc_STRVAR(match_groups_doc,
3609"groups([default=None]) -> tuple.\n\n\
3610 Return a tuple containing all the subgroups of the match, from 1.\n\
3611 The default argument is used for groups\n\
3612 that did not participate in the match");
3613
3614PyDoc_STRVAR(match_groupdict_doc,
3615"groupdict([default=None]) -> dict.\n\n\
3616 Return a dictionary containing all the named subgroups of the match,\n\
3617 keyed by the subgroup name. The default argument is used for groups\n\
3618 that did not participate in the match");
3619
3620PyDoc_STRVAR(match_expand_doc,
3621"expand(template) -> str.\n\n\
3622 Return the string obtained by doing backslash substitution\n\
3623 on the string template, as done by the sub() method.");
3624
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003625static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02003626 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
3627 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
3628 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
3629 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
3630 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
3631 match_groups_doc},
3632 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
3633 match_groupdict_doc},
3634 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003635 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3636 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003637 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003638};
3639
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003640static PyObject *
3641match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00003642{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003643 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01003644 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003645 Py_INCREF(Py_None);
3646 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00003647}
3648
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003649static PyObject *
3650match_lastgroup_get(MatchObject *self)
3651{
3652 if (self->pattern->indexgroup && self->lastindex >= 0) {
3653 PyObject* result = PySequence_GetItem(
3654 self->pattern->indexgroup, self->lastindex
3655 );
3656 if (result)
3657 return result;
3658 PyErr_Clear();
3659 }
3660 Py_INCREF(Py_None);
3661 return Py_None;
3662}
3663
3664static PyObject *
3665match_regs_get(MatchObject *self)
3666{
3667 if (self->regs) {
3668 Py_INCREF(self->regs);
3669 return self->regs;
3670 } else
3671 return match_regs(self);
3672}
3673
3674static PyGetSetDef match_getset[] = {
3675 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
3676 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
3677 {"regs", (getter)match_regs_get, (setter)NULL},
3678 {NULL}
3679};
3680
3681#define MATCH_OFF(x) offsetof(MatchObject, x)
3682static PyMemberDef match_members[] = {
3683 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
3684 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
3685 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
3686 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
3687 {NULL}
3688};
3689
Guido van Rossumb700df92000-03-31 14:59:30 +00003690/* FIXME: implement setattr("string", None) as a special case (to
3691 detach the associated string, if any */
3692
Neal Norwitz57c179c2006-03-22 07:18:02 +00003693static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003694 PyVarObject_HEAD_INIT(NULL,0)
3695 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003696 sizeof(MatchObject), sizeof(Py_ssize_t),
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003697 (destructor)match_dealloc, /* tp_dealloc */
3698 0, /* tp_print */
3699 0, /* tp_getattr */
3700 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003701 0, /* tp_reserved */
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003702 0, /* tp_repr */
3703 0, /* tp_as_number */
3704 0, /* tp_as_sequence */
3705 0, /* tp_as_mapping */
3706 0, /* tp_hash */
3707 0, /* tp_call */
3708 0, /* tp_str */
3709 0, /* tp_getattro */
3710 0, /* tp_setattro */
3711 0, /* tp_as_buffer */
3712 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02003713 match_doc, /* tp_doc */
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003714 0, /* tp_traverse */
3715 0, /* tp_clear */
3716 0, /* tp_richcompare */
3717 0, /* tp_weaklistoffset */
3718 0, /* tp_iter */
3719 0, /* tp_iternext */
3720 match_methods, /* tp_methods */
3721 match_members, /* tp_members */
3722 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00003723};
3724
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003725static PyObject*
3726pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3727{
3728 /* create match object (from state object) */
3729
3730 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003731 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003732 char* base;
3733 int n;
3734
3735 if (status > 0) {
3736
3737 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00003738 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003739 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3740 2*(pattern->groups+1));
3741 if (!match)
3742 return NULL;
3743
3744 Py_INCREF(pattern);
3745 match->pattern = pattern;
3746
3747 Py_INCREF(state->string);
3748 match->string = state->string;
3749
3750 match->regs = NULL;
3751 match->groups = pattern->groups+1;
3752
3753 /* fill in group slices */
3754
3755 base = (char*) state->beginning;
3756 n = state->charsize;
3757
3758 match->mark[0] = ((char*) state->start - base) / n;
3759 match->mark[1] = ((char*) state->ptr - base) / n;
3760
3761 for (i = j = 0; i < pattern->groups; i++, j+=2)
3762 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3763 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3764 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3765 } else
3766 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3767
3768 match->pos = state->pos;
3769 match->endpos = state->endpos;
3770
3771 match->lastindex = state->lastindex;
3772
3773 return (PyObject*) match;
3774
3775 } else if (status == 0) {
3776
3777 /* no match */
3778 Py_INCREF(Py_None);
3779 return Py_None;
3780
3781 }
3782
3783 /* internal error */
3784 pattern_error(status);
3785 return NULL;
3786}
3787
3788
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003789/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003790/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003791
3792static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003793scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003794{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003795 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00003796 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003797 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003798}
3799
3800static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003801scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003802{
3803 SRE_STATE* state = &self->state;
3804 PyObject* match;
3805 int status;
3806
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003807 state_reset(state);
3808
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003809 state->ptr = state->start;
3810
3811 if (state->charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003812 status = sre_match(state, PatternObject_GetCode(self->pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003813 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003814#if defined(HAVE_UNICODE)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003815 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003816#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003817 }
Thomas Wouters89f507f2006-12-13 04:49:30 +00003818 if (PyErr_Occurred())
3819 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003820
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003821 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003822 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003823
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003824 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003825 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003826 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003827 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003828
3829 return match;
3830}
3831
3832
3833static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003834scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003835{
3836 SRE_STATE* state = &self->state;
3837 PyObject* match;
3838 int status;
3839
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003840 state_reset(state);
3841
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003842 state->ptr = state->start;
3843
3844 if (state->charsize == 1) {
3845 status = sre_search(state, PatternObject_GetCode(self->pattern));
3846 } else {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003847#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003848 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003849#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003850 }
Thomas Wouters89f507f2006-12-13 04:49:30 +00003851 if (PyErr_Occurred())
3852 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003853
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003854 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003855 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003856
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003857 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003858 state->start = (void*) ((char*) state->ptr + state->charsize);
3859 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003860 state->start = state->ptr;
3861
3862 return match;
3863}
3864
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003865static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003866 {"match", (PyCFunction) scanner_match, METH_NOARGS},
3867 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003868 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003869};
3870
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003871#define SCAN_OFF(x) offsetof(ScannerObject, x)
3872static PyMemberDef scanner_members[] = {
3873 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
3874 {NULL} /* Sentinel */
3875};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003876
Neal Norwitz57c179c2006-03-22 07:18:02 +00003877static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003878 PyVarObject_HEAD_INIT(NULL, 0)
3879 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003880 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003881 (destructor)scanner_dealloc,/* tp_dealloc */
3882 0, /* tp_print */
3883 0, /* tp_getattr */
3884 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00003885 0, /* tp_reserved */
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003886 0, /* tp_repr */
3887 0, /* tp_as_number */
3888 0, /* tp_as_sequence */
3889 0, /* tp_as_mapping */
3890 0, /* tp_hash */
3891 0, /* tp_call */
3892 0, /* tp_str */
3893 0, /* tp_getattro */
3894 0, /* tp_setattro */
3895 0, /* tp_as_buffer */
3896 Py_TPFLAGS_DEFAULT, /* tp_flags */
3897 0, /* tp_doc */
3898 0, /* tp_traverse */
3899 0, /* tp_clear */
3900 0, /* tp_richcompare */
3901 0, /* tp_weaklistoffset */
3902 0, /* tp_iter */
3903 0, /* tp_iternext */
3904 scanner_methods, /* tp_methods */
3905 scanner_members, /* tp_members */
3906 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003907};
3908
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003909static PyObject*
3910pattern_scanner(PatternObject* pattern, PyObject* args)
3911{
3912 /* create search state object */
3913
3914 ScannerObject* self;
3915
3916 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003917 Py_ssize_t start = 0;
3918 Py_ssize_t end = PY_SSIZE_T_MAX;
3919 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003920 return NULL;
3921
3922 /* create scanner object */
3923 self = PyObject_NEW(ScannerObject, &Scanner_Type);
3924 if (!self)
3925 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00003926 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003927
3928 string = state_init(&self->state, pattern, string, start, end);
3929 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00003930 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003931 return NULL;
3932 }
3933
3934 Py_INCREF(pattern);
3935 self->pattern = (PyObject*) pattern;
3936
3937 return (PyObject*) self;
3938}
3939
Guido van Rossumb700df92000-03-31 14:59:30 +00003940static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00003941 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003942 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00003943 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003944 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003945};
3946
Martin v. Löwis1a214512008-06-11 05:26:20 +00003947static struct PyModuleDef sremodule = {
3948 PyModuleDef_HEAD_INIT,
3949 "_" SRE_MODULE,
3950 NULL,
3951 -1,
3952 _functions,
3953 NULL,
3954 NULL,
3955 NULL,
3956 NULL
3957};
3958
3959PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00003960{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003961 PyObject* m;
3962 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00003963 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003964
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00003965 /* Patch object types */
3966 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
3967 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00003968 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003969
Martin v. Löwis1a214512008-06-11 05:26:20 +00003970 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00003971 if (m == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003972 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003973 d = PyModule_GetDict(m);
3974
Christian Heimes217cfd12007-12-02 14:31:20 +00003975 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00003976 if (x) {
3977 PyDict_SetItemString(d, "MAGIC", x);
3978 Py_DECREF(x);
3979 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003980
Christian Heimes217cfd12007-12-02 14:31:20 +00003981 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00003982 if (x) {
3983 PyDict_SetItemString(d, "CODESIZE", x);
3984 Py_DECREF(x);
3985 }
3986
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02003987 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
3988 if (x) {
3989 PyDict_SetItemString(d, "MAXREPEAT", x);
3990 Py_DECREF(x);
3991 }
3992
Neal Norwitzfe537132007-08-26 03:55:15 +00003993 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00003994 if (x) {
3995 PyDict_SetItemString(d, "copyright", x);
3996 Py_DECREF(x);
3997 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00003998 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00003999}
4000
Fredrik Lundh436c3d582000-06-29 08:58:44 +00004001#endif /* !defined(SRE_RECURSIVE) */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00004002
4003/* vim:ts=4:sw=4:et
4004*/