blob: 808fb57528a5fa085c95d930f443c5299f00c559 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
Fredrik Lundhebc37b22000-10-28 19:30:41 +00009 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000010 * 2000-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000011 * 2000-09-20 fl added expand method
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000012 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000013 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000014 * 2001-04-28 fl added __copy__ methods (work in progress)
Fredrik Lundh09705f02002-11-22 12:46:35 +000015 * 2001-05-14 fl fixes for 1.5.2 compatibility
Fredrik Lundhf71ae462001-07-02 17:04:48 +000016 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000017 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000018 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000019 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh703ce812001-10-24 22:16:30 +000020 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000021 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Fredrik Lundh09705f02002-11-22 12:46:35 +000022 * 2002-11-09 fl fixed empty sub/subn return type
Martin v. Löwis78e2f062003-04-19 12:56:08 +000023 * 2003-04-18 mvl fully support 4-byte codes
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +000024 * 2003-10-17 gn implemented non recursive scheme
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000026 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000027 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * other compatibility work.
35 */
36
37#ifndef SRE_RECURSIVE
38
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000039static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000040 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000041
Neal Norwitza6d80fa2006-06-12 03:05:40 +000042#define PY_SSIZE_T_CLEAN
43
Guido van Rossumb700df92000-03-31 14:59:30 +000044#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000045#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000046
47#include "sre.h"
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d52000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000052#if !defined(SRE_MODULE)
53#define SRE_MODULE "sre"
54#endif
Fredrik Lundh436c3d52000-06-29 08:58:44 +000055
Neal Norwitz94a9c092006-03-16 06:30:02 +000056#define SRE_PY_MODULE "re"
57
Guido van Rossumb700df92000-03-31 14:59:30 +000058/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000060
Fredrik Lundh971e78b2001-10-20 17:48:46 +000061#if PY_VERSION_HEX >= 0x01060000
62#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
Fredrik Lundh22d25462000-07-01 17:50:59 +000063/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d52000-06-29 08:58:44 +000064#define HAVE_UNICODE
65#endif
Fredrik Lundh971e78b2001-10-20 17:48:46 +000066#endif
Fredrik Lundh436c3d52000-06-29 08:58:44 +000067
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000069/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000070
71/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000072#define USE_FAST_SEARCH
73
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000074/* enables aggressive inlining (always on for Visual C) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#undef USE_INLINE
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000076
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000077/* enables copy/deepcopy handling (work in progress) */
78#undef USE_BUILTIN_COPY
79
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000080#if PY_VERSION_HEX < 0x01060000
81#define PyObject_DEL(op) PyMem_DEL((op))
82#endif
83
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000084/* -------------------------------------------------------------------- */
85
Fredrik Lundh80946112000-06-29 18:03:25 +000086#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000087#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000088#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000089/* fastest possible local call under MSVC */
90#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000092#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000093#else
94#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000095#endif
96
97/* error codes */
98#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000099#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000100#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +0000101#define SRE_ERROR_MEMORY -9 /* out of memory */
Facundo Batista4473d222008-01-08 21:10:12 +0000102#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +0000103
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000104#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +0000105#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +0000106#else
107#define TRACE(v)
108#endif
109
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000110/* -------------------------------------------------------------------- */
111/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000112
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000113/* default character predicates (run sre_chars.py to regenerate tables) */
114
115#define SRE_DIGIT_MASK 1
116#define SRE_SPACE_MASK 2
117#define SRE_LINEBREAK_MASK 4
118#define SRE_ALNUM_MASK 8
119#define SRE_WORD_MASK 16
120
Fredrik Lundh21009b92001-09-18 18:47:09 +0000121/* FIXME: this assumes ASCII. create tables in init_sre() instead */
122
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000123static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1242, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1250, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
12625, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12724, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1280, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12924, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
130
Fredrik Lundhb389df32000-06-29 12:48:37 +0000131static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d52000-06-29 08:58:44 +000013210, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
13327, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
13444, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
13561, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
136108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
137122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
138106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
139120, 121, 122, 123, 124, 125, 126, 127 };
140
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000141#define SRE_IS_DIGIT(ch)\
142 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
143#define SRE_IS_SPACE(ch)\
144 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
145#define SRE_IS_LINEBREAK(ch)\
146 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
147#define SRE_IS_ALNUM(ch)\
148 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
149#define SRE_IS_WORD(ch)\
150 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000151
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000152static unsigned int sre_lower(unsigned int ch)
153{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000154 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000155}
156
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000157/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000158/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
159 * warnings when c's type supports only numbers < N+1 */
160#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
161#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000162#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000163#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000164#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
165
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000166static unsigned int sre_lower_locale(unsigned int ch)
167{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000168 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000169}
170
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000171/* unicode-specific character predicates */
172
173#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000174
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000175#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
176#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
177#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
Fredrik Lundh22d25462000-07-01 17:50:59 +0000178#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000180
181static unsigned int sre_lower_unicode(unsigned int ch)
182{
183 return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
184}
185
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000186#endif
187
Guido van Rossumb700df92000-03-31 14:59:30 +0000188LOCAL(int)
189sre_category(SRE_CODE category, unsigned int ch)
190{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000191 switch (category) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000192
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000193 case SRE_CATEGORY_DIGIT:
194 return SRE_IS_DIGIT(ch);
195 case SRE_CATEGORY_NOT_DIGIT:
196 return !SRE_IS_DIGIT(ch);
197 case SRE_CATEGORY_SPACE:
198 return SRE_IS_SPACE(ch);
199 case SRE_CATEGORY_NOT_SPACE:
200 return !SRE_IS_SPACE(ch);
201 case SRE_CATEGORY_WORD:
202 return SRE_IS_WORD(ch);
203 case SRE_CATEGORY_NOT_WORD:
204 return !SRE_IS_WORD(ch);
205 case SRE_CATEGORY_LINEBREAK:
206 return SRE_IS_LINEBREAK(ch);
207 case SRE_CATEGORY_NOT_LINEBREAK:
208 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000209
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000210 case SRE_CATEGORY_LOC_WORD:
211 return SRE_LOC_IS_WORD(ch);
212 case SRE_CATEGORY_LOC_NOT_WORD:
213 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000214
215#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000216 case SRE_CATEGORY_UNI_DIGIT:
217 return SRE_UNI_IS_DIGIT(ch);
218 case SRE_CATEGORY_UNI_NOT_DIGIT:
219 return !SRE_UNI_IS_DIGIT(ch);
220 case SRE_CATEGORY_UNI_SPACE:
221 return SRE_UNI_IS_SPACE(ch);
222 case SRE_CATEGORY_UNI_NOT_SPACE:
223 return !SRE_UNI_IS_SPACE(ch);
224 case SRE_CATEGORY_UNI_WORD:
225 return SRE_UNI_IS_WORD(ch);
226 case SRE_CATEGORY_UNI_NOT_WORD:
227 return !SRE_UNI_IS_WORD(ch);
228 case SRE_CATEGORY_UNI_LINEBREAK:
229 return SRE_UNI_IS_LINEBREAK(ch);
230 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
231 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh1c5aa692001-01-16 07:37:30 +0000232#else
233 case SRE_CATEGORY_UNI_DIGIT:
234 return SRE_IS_DIGIT(ch);
235 case SRE_CATEGORY_UNI_NOT_DIGIT:
236 return !SRE_IS_DIGIT(ch);
237 case SRE_CATEGORY_UNI_SPACE:
238 return SRE_IS_SPACE(ch);
239 case SRE_CATEGORY_UNI_NOT_SPACE:
240 return !SRE_IS_SPACE(ch);
241 case SRE_CATEGORY_UNI_WORD:
242 return SRE_LOC_IS_WORD(ch);
243 case SRE_CATEGORY_UNI_NOT_WORD:
244 return !SRE_LOC_IS_WORD(ch);
245 case SRE_CATEGORY_UNI_LINEBREAK:
246 return SRE_IS_LINEBREAK(ch);
247 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
248 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000249#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000250 }
251 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000252}
253
254/* helpers */
255
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000256static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000257data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000258{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000259 if (state->data_stack) {
Jack Diederich2d400772006-05-27 15:44:34 +0000260 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000261 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000262 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000263 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000264}
265
266static int
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000267data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000268{
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000269 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000270 minsize = state->data_stack_base+size;
271 cursize = state->data_stack_size;
272 if (cursize < minsize) {
273 void* stack;
274 cursize = minsize+minsize/4+1024;
275 TRACE(("allocate/grow stack %d\n", cursize));
Jack Diederich2d400772006-05-27 15:44:34 +0000276 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000277 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000278 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000279 return SRE_ERROR_MEMORY;
280 }
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000281 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000282 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000283 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000284 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000285}
286
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000287/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000288
289#define SRE_CHAR unsigned char
290#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000291#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000292#define SRE_CHARSET sre_charset
293#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000294#define SRE_MATCH sre_match
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000295#define SRE_MATCH_CONTEXT sre_match_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000296#define SRE_SEARCH sre_search
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000297#define SRE_LITERAL_TEMPLATE sre_literal_template
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000298
299#if defined(HAVE_UNICODE)
300
Guido van Rossumb700df92000-03-31 14:59:30 +0000301#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000302#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000303#undef SRE_RECURSIVE
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000304
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000305#undef SRE_LITERAL_TEMPLATE
Guido van Rossumb700df92000-03-31 14:59:30 +0000306#undef SRE_SEARCH
307#undef SRE_MATCH
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308#undef SRE_MATCH_CONTEXT
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000309#undef SRE_INFO
310#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000311#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000312#undef SRE_AT
313#undef SRE_CHAR
314
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000315/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000316
317#define SRE_CHAR Py_UNICODE
318#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000319#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000320#define SRE_CHARSET sre_ucharset
321#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000322#define SRE_MATCH sre_umatch
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000323#define SRE_MATCH_CONTEXT sre_umatch_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000324#define SRE_SEARCH sre_usearch
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000325#define SRE_LITERAL_TEMPLATE sre_uliteral_template
Fredrik Lundh436c3d52000-06-29 08:58:44 +0000326#endif
Guido van Rossumb700df92000-03-31 14:59:30 +0000327
328#endif /* SRE_RECURSIVE */
329
330/* -------------------------------------------------------------------- */
331/* String matching engine */
332
333/* the following section is compiled twice, with different character
334 settings */
335
336LOCAL(int)
337SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
338{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000339 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000340
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000341 Py_ssize_t thisp, thatp;
Guido van Rossumb700df92000-03-31 14:59:30 +0000342
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000343 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000344
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000345 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000346 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000347 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000348
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000349 case SRE_AT_BEGINNING_LINE:
350 return ((void*) ptr == state->beginning ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000351 SRE_IS_LINEBREAK((int) ptr[-1]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000352
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 case SRE_AT_END:
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000354 return (((void*) (ptr+1) == state->end &&
355 SRE_IS_LINEBREAK((int) ptr[0])) ||
356 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000357
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000358 case SRE_AT_END_LINE:
359 return ((void*) ptr == state->end ||
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000360 SRE_IS_LINEBREAK((int) ptr[0]));
Fredrik Lundh80946112000-06-29 18:03:25 +0000361
Fredrik Lundh770617b2001-01-14 15:06:11 +0000362 case SRE_AT_END_STRING:
363 return ((void*) ptr == state->end);
364
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000365 case SRE_AT_BOUNDARY:
366 if (state->beginning == state->end)
367 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000368 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000369 SRE_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000370 thisp = ((void*) ptr < state->end) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000371 SRE_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000372 return thisp != thatp;
Fredrik Lundh80946112000-06-29 18:03:25 +0000373
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000374 case SRE_AT_NON_BOUNDARY:
375 if (state->beginning == state->end)
376 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000377 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000378 SRE_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000379 thisp = ((void*) ptr < state->end) ?
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000380 SRE_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000381 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000382
383 case SRE_AT_LOC_BOUNDARY:
384 if (state->beginning == state->end)
385 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000386 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000387 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000388 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000389 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000390 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000391
392 case SRE_AT_LOC_NON_BOUNDARY:
393 if (state->beginning == state->end)
394 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000395 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000396 SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000397 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000398 SRE_LOC_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000399 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000400
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000401#if defined(HAVE_UNICODE)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000402 case SRE_AT_UNI_BOUNDARY:
403 if (state->beginning == state->end)
404 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000405 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000406 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000407 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000408 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000409 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000410
411 case SRE_AT_UNI_NON_BOUNDARY:
412 if (state->beginning == state->end)
413 return 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000414 thatp = ((void*) ptr > state->beginning) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000415 SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000416 thisp = ((void*) ptr < state->end) ?
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000417 SRE_UNI_IS_WORD((int) ptr[0]) : 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000418 return thisp == thatp;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000419#endif
420
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000421 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000422
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000423 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000424}
425
426LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000427SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000428{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000430
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000431 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000432
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000433 for (;;) {
434 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000435
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000436 case SRE_OP_FAILURE:
437 return !ok;
438
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000439 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000440 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000441 if (ch == set[0])
442 return ok;
443 set++;
444 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000445
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000446 case SRE_OP_CATEGORY:
447 /* <CATEGORY> <code> */
448 if (sre_category(set[0], (int) ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000449 return ok;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000450 set += 1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000451 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000452
Fredrik Lundh3562f112000-07-02 12:00:07 +0000453 case SRE_OP_CHARSET:
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000454 if (sizeof(SRE_CODE) == 2) {
455 /* <CHARSET> <bitmap> (16 bits per code word) */
456 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
457 return ok;
458 set += 16;
Tim Peters3d563502006-01-21 02:47:53 +0000459 }
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000460 else {
461 /* <CHARSET> <bitmap> (32 bits per code word) */
462 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
463 return ok;
464 set += 8;
465 }
Fredrik Lundh3562f112000-07-02 12:00:07 +0000466 break;
467
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000468 case SRE_OP_RANGE:
469 /* <RANGE> <lower> <upper> */
470 if (set[0] <= ch && ch <= set[1])
471 return ok;
472 set += 2;
473 break;
474
475 case SRE_OP_NEGATE:
476 ok = !ok;
477 break;
478
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000479 case SRE_OP_BIGCHARSET:
480 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
481 {
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000482 Py_ssize_t count, block;
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000483 count = *(set++);
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000484
485 if (sizeof(SRE_CODE) == 2) {
486 block = ((unsigned char*)set)[ch >> 8];
487 set += 128;
488 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
489 return ok;
490 set += count*16;
491 }
492 else {
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000493 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
494 * warnings when c's type supports only numbers < N+1 */
495 if (!(ch & ~65535))
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000496 block = ((unsigned char*)set)[ch >> 8];
497 else
498 block = -1;
499 set += 64;
Tim Peters3d563502006-01-21 02:47:53 +0000500 if (block >=0 &&
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000501 (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
502 return ok;
503 set += count*8;
504 }
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000505 break;
506 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000507
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000508 default:
509 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000510 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000511 return 0;
512 }
513 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000514}
515
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000516LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000517
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000518LOCAL(Py_ssize_t)
519SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000520{
521 SRE_CODE chr;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000522 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
523 SRE_CHAR* end = (SRE_CHAR *)state->end;
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000524 Py_ssize_t i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000525
526 /* adjust end */
527 if (maxcount < end - ptr && maxcount != 65535)
528 end = ptr + maxcount;
529
530 switch (pattern[0]) {
531
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000532 case SRE_OP_IN:
533 /* repeated set */
534 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
535 while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
536 ptr++;
537 break;
538
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000539 case SRE_OP_ANY:
540 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000541 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000542 while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
543 ptr++;
544 break;
545
546 case SRE_OP_ANY_ALL:
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000547 /* repeated dot wildcard. skip to the end of the target
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000548 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000549 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000550 ptr = end;
551 break;
552
553 case SRE_OP_LITERAL:
554 /* repeated literal */
555 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000556 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000557 while (ptr < end && (SRE_CODE) *ptr == chr)
558 ptr++;
559 break;
560
561 case SRE_OP_LITERAL_IGNORE:
562 /* repeated literal */
563 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000564 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000565 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
566 ptr++;
567 break;
568
569 case SRE_OP_NOT_LITERAL:
570 /* repeated non-literal */
571 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000572 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000573 while (ptr < end && (SRE_CODE) *ptr != chr)
574 ptr++;
575 break;
Tim Peters3d563502006-01-21 02:47:53 +0000576
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000577 case SRE_OP_NOT_LITERAL_IGNORE:
578 /* repeated non-literal */
579 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000580 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000581 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
582 ptr++;
583 break;
584
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000585 default:
586 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000587 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000588 while ((SRE_CHAR*) state->ptr < end) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000589 i = SRE_MATCH(state, pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000590 if (i < 0)
591 return i;
592 if (!i)
593 break;
594 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000595 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
596 (SRE_CHAR*) state->ptr - ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000597 return (SRE_CHAR*) state->ptr - ptr;
598 }
599
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000600 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000601 return ptr - (SRE_CHAR*) state->ptr;
602}
603
Fredrik Lundh33accc12000-08-27 20:59:47 +0000604#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000605LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000606SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
607{
608 /* check if an SRE_OP_INFO block matches at the current position.
609 returns the number of SRE_CODE objects to skip if successful, 0
610 if no match */
611
612 SRE_CHAR* end = state->end;
613 SRE_CHAR* ptr = state->ptr;
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000614 Py_ssize_t i;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000615
616 /* check minimal length */
617 if (pattern[3] && (end - ptr) < pattern[3])
618 return 0;
619
620 /* check known prefix */
621 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
622 /* <length> <skip> <prefix data> <overlap data> */
623 for (i = 0; i < pattern[5]; i++)
624 if ((SRE_CODE) ptr[i] != pattern[7 + i])
625 return 0;
626 return pattern[0] + 2 * pattern[6];
627 }
628 return pattern[0];
629}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000630#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000631
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000632/* The macros below should be used to protect recursive SRE_MATCH()
633 * calls that *failed* and do *not* return immediately (IOW, those
634 * that will backtrack). Explaining:
635 *
636 * - Recursive SRE_MATCH() returned true: that's usually a success
637 * (besides atypical cases like ASSERT_NOT), therefore there's no
638 * reason to restore lastmark;
639 *
640 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
641 * is returning to the caller: If the current SRE_MATCH() is the
642 * top function of the recursion, returning false will be a matching
643 * failure, and it doesn't matter where lastmark is pointing to.
644 * If it's *not* the top function, it will be a recursive SRE_MATCH()
645 * failure by itself, and the calling SRE_MATCH() will have to deal
646 * with the failure by the same rules explained here (it will restore
647 * lastmark by itself if necessary);
648 *
649 * - Recursive SRE_MATCH() returned false, and will continue the
650 * outside 'for' loop: must be protected when breaking, since the next
651 * OP could potentially depend on lastmark;
Tim Peters3d563502006-01-21 02:47:53 +0000652 *
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000653 * - Recursive SRE_MATCH() returned false, and will be called again
654 * inside a local for/while loop: must be protected between each
655 * loop iteration, since the recursive SRE_MATCH() could do anything,
656 * and could potentially depend on lastmark.
657 *
658 * For more information, check the discussion at SF patch #712900.
659 */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000660#define LASTMARK_SAVE() \
661 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000662 ctx->lastmark = state->lastmark; \
663 ctx->lastindex = state->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000664 } while (0)
665#define LASTMARK_RESTORE() \
666 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000667 state->lastmark = ctx->lastmark; \
668 state->lastindex = ctx->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000669 } while (0)
670
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000671#define RETURN_ERROR(i) do { return i; } while(0)
672#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
673#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
674
675#define RETURN_ON_ERROR(i) \
676 do { if (i < 0) RETURN_ERROR(i); } while (0)
677#define RETURN_ON_SUCCESS(i) \
678 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
679#define RETURN_ON_FAILURE(i) \
680 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
681
682#define SFY(x) #x
683
684#define DATA_STACK_ALLOC(state, type, ptr) \
685do { \
686 alloc_pos = state->data_stack_base; \
687 TRACE(("allocating %s in %d (%d)\n", \
688 SFY(type), alloc_pos, sizeof(type))); \
689 if (state->data_stack_size < alloc_pos+sizeof(type)) { \
690 int j = data_stack_grow(state, sizeof(type)); \
691 if (j < 0) return j; \
692 if (ctx_pos != -1) \
693 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
694 } \
695 ptr = (type*)(state->data_stack+alloc_pos); \
696 state->data_stack_base += sizeof(type); \
697} while (0)
698
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000699#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
700do { \
701 TRACE(("looking up %s at %d\n", SFY(type), pos)); \
702 ptr = (type*)(state->data_stack+pos); \
703} while (0)
704
705#define DATA_STACK_PUSH(state, data, size) \
706do { \
707 TRACE(("copy data in %p to %d (%d)\n", \
708 data, state->data_stack_base, size)); \
709 if (state->data_stack_size < state->data_stack_base+size) { \
710 int j = data_stack_grow(state, size); \
711 if (j < 0) return j; \
712 if (ctx_pos != -1) \
713 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
714 } \
715 memcpy(state->data_stack+state->data_stack_base, data, size); \
716 state->data_stack_base += size; \
717} while (0)
718
719#define DATA_STACK_POP(state, data, size, discard) \
720do { \
721 TRACE(("copy data to %p from %d (%d)\n", \
722 data, state->data_stack_base-size, size)); \
723 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
724 if (discard) \
725 state->data_stack_base -= size; \
726} while (0)
727
728#define DATA_STACK_POP_DISCARD(state, size) \
729do { \
730 TRACE(("discard data from %d (%d)\n", \
731 state->data_stack_base-size, size)); \
732 state->data_stack_base -= size; \
733} while(0)
734
735#define DATA_PUSH(x) \
736 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
737#define DATA_POP(x) \
738 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000739#define DATA_POP_DISCARD(x) \
740 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
741#define DATA_ALLOC(t,p) \
742 DATA_STACK_ALLOC(state, t, p)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000743#define DATA_LOOKUP_AT(t,p,pos) \
744 DATA_STACK_LOOKUP_AT(state,t,p,pos)
745
746#define MARK_PUSH(lastmark) \
747 do if (lastmark > 0) { \
748 i = lastmark; /* ctx->lastmark may change if reallocated */ \
749 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
750 } while (0)
751#define MARK_POP(lastmark) \
752 do if (lastmark > 0) { \
753 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
754 } while (0)
755#define MARK_POP_KEEP(lastmark) \
756 do if (lastmark > 0) { \
757 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
758 } while (0)
759#define MARK_POP_DISCARD(lastmark) \
760 do if (lastmark > 0) { \
761 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
762 } while (0)
763
764#define JUMP_NONE 0
765#define JUMP_MAX_UNTIL_1 1
766#define JUMP_MAX_UNTIL_2 2
767#define JUMP_MAX_UNTIL_3 3
768#define JUMP_MIN_UNTIL_1 4
769#define JUMP_MIN_UNTIL_2 5
770#define JUMP_MIN_UNTIL_3 6
771#define JUMP_REPEAT 7
772#define JUMP_REPEAT_ONE_1 8
773#define JUMP_REPEAT_ONE_2 9
774#define JUMP_MIN_REPEAT_ONE 10
775#define JUMP_BRANCH 11
776#define JUMP_ASSERT 12
777#define JUMP_ASSERT_NOT 13
778
779#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
780 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
781 nextctx->last_ctx_pos = ctx_pos; \
782 nextctx->jump = jumpvalue; \
783 nextctx->pattern = nextpattern; \
784 ctx_pos = alloc_pos; \
785 ctx = nextctx; \
786 goto entrance; \
787 jumplabel: \
788 while (0) /* gcc doesn't like labels at end of scopes */ \
789
790typedef struct {
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000791 Py_ssize_t last_ctx_pos;
792 Py_ssize_t jump;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000793 SRE_CHAR* ptr;
794 SRE_CODE* pattern;
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000795 Py_ssize_t count;
796 Py_ssize_t lastmark;
797 Py_ssize_t lastindex;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000798 union {
799 SRE_CODE chr;
800 SRE_REPEAT* rep;
801 } u;
802} SRE_MATCH_CONTEXT;
803
804/* check if string matches the given pattern. returns <0 for
805 error, 0 for failure, and 1 for success */
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000806LOCAL(Py_ssize_t)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000807SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
Guido van Rossumb700df92000-03-31 14:59:30 +0000808{
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000809 SRE_CHAR* end = (SRE_CHAR *)state->end;
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000810 Py_ssize_t alloc_pos, ctx_pos = -1;
811 Py_ssize_t i, ret = 0;
812 Py_ssize_t jump;
Facundo Batista4473d222008-01-08 21:10:12 +0000813 unsigned int sigcount=0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000814
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000815 SRE_MATCH_CONTEXT* ctx;
816 SRE_MATCH_CONTEXT* nextctx;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000817
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000818 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000819
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000820 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
821 ctx->last_ctx_pos = -1;
822 ctx->jump = JUMP_NONE;
823 ctx->pattern = pattern;
824 ctx_pos = alloc_pos;
825
826entrance:
827
Anthony Baxteraefd8ca2006-04-12 04:26:11 +0000828 ctx->ptr = (SRE_CHAR *)state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000829
830 if (ctx->pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000831 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000832 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000833 if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000834 TRACE(("reject (got %d chars, need %d)\n",
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000835 (end - ctx->ptr), ctx->pattern[3]));
836 RETURN_FAILURE;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000837 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000838 ctx->pattern += ctx->pattern[1] + 1;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000839 }
840
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000841 for (;;) {
Facundo Batista4473d222008-01-08 21:10:12 +0000842 ++sigcount;
843 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
844 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
Guido van Rossumb700df92000-03-31 14:59:30 +0000845
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000846 switch (*ctx->pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000847
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000848 case SRE_OP_MARK:
849 /* set mark */
850 /* <MARK> <gid> */
851 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
852 ctx->ptr, ctx->pattern[0]));
853 i = ctx->pattern[0];
854 if (i & 1)
855 state->lastindex = i/2 + 1;
856 if (i > state->lastmark) {
857 /* state->lastmark is the highest valid index in the
858 state->mark array. If it is increased by more than 1,
859 the intervening marks must be set to NULL to signal
Tim Peters3d563502006-01-21 02:47:53 +0000860 that these marks have not been encountered. */
Neal Norwitza6d80fa2006-06-12 03:05:40 +0000861 Py_ssize_t j = state->lastmark + 1;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000862 while (j < i)
863 state->mark[j++] = NULL;
864 state->lastmark = i;
865 }
866 state->mark[i] = ctx->ptr;
867 ctx->pattern++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000868 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000869
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000870 case SRE_OP_LITERAL:
871 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000872 /* <LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000873 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
874 ctx->ptr, *ctx->pattern));
875 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
876 RETURN_FAILURE;
877 ctx->pattern++;
878 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000879 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000880
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000881 case SRE_OP_NOT_LITERAL:
882 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000883 /* <NOT_LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000884 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
885 ctx->ptr, *ctx->pattern));
886 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
887 RETURN_FAILURE;
888 ctx->pattern++;
889 ctx->ptr++;
890 break;
891
892 case SRE_OP_SUCCESS:
893 /* end of pattern */
894 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
895 state->ptr = ctx->ptr;
896 RETURN_SUCCESS;
897
898 case SRE_OP_AT:
899 /* match at given position */
900 /* <AT> <code> */
901 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
902 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
903 RETURN_FAILURE;
904 ctx->pattern++;
905 break;
906
907 case SRE_OP_CATEGORY:
908 /* match at given category */
909 /* <CATEGORY> <code> */
910 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
911 ctx->ptr, *ctx->pattern));
912 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
913 RETURN_FAILURE;
914 ctx->pattern++;
915 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000916 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000917
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000918 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000919 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000920 /* <ANY> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000921 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
922 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
923 RETURN_FAILURE;
924 ctx->ptr++;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000925 break;
926
927 case SRE_OP_ANY_ALL:
928 /* match anything */
929 /* <ANY_ALL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000930 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
931 if (ctx->ptr >= end)
932 RETURN_FAILURE;
933 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000934 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000935
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000936 case SRE_OP_IN:
937 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000938 /* <IN> <skip> <set> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000939 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
940 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
941 RETURN_FAILURE;
942 ctx->pattern += ctx->pattern[0];
943 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000944 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000945
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000946 case SRE_OP_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000947 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
948 ctx->pattern, ctx->ptr, ctx->pattern[0]));
949 if (ctx->ptr >= end ||
950 state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
951 RETURN_FAILURE;
952 ctx->pattern++;
953 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000954 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000955
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000956 case SRE_OP_NOT_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000957 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
958 ctx->pattern, ctx->ptr, *ctx->pattern));
959 if (ctx->ptr >= end ||
960 state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
961 RETURN_FAILURE;
962 ctx->pattern++;
963 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000964 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000965
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000966 case SRE_OP_IN_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000967 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
968 if (ctx->ptr >= end
969 || !SRE_CHARSET(ctx->pattern+1,
970 (SRE_CODE)state->lower(*ctx->ptr)))
971 RETURN_FAILURE;
972 ctx->pattern += ctx->pattern[0];
973 ctx->ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000974 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000975
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000976 case SRE_OP_JUMP:
977 case SRE_OP_INFO:
978 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000979 /* <JUMP> <offset> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000980 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
981 ctx->ptr, ctx->pattern[0]));
982 ctx->pattern += ctx->pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000983 break;
984
985 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000986 /* alternation */
987 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000988 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000989 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000990 ctx->u.rep = state->repeat;
991 if (ctx->u.rep)
992 MARK_PUSH(ctx->lastmark);
993 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
994 if (ctx->pattern[1] == SRE_OP_LITERAL &&
995 (ctx->ptr >= end ||
996 (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000997 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000998 if (ctx->pattern[1] == SRE_OP_IN &&
999 (ctx->ptr >= end ||
1000 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001001 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001002 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001003 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001004 if (ret) {
1005 if (ctx->u.rep)
1006 MARK_POP_DISCARD(ctx->lastmark);
1007 RETURN_ON_ERROR(ret);
1008 RETURN_SUCCESS;
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +00001009 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001010 if (ctx->u.rep)
1011 MARK_POP_KEEP(ctx->lastmark);
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001012 LASTMARK_RESTORE();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001013 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001014 if (ctx->u.rep)
1015 MARK_POP_DISCARD(ctx->lastmark);
1016 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001017
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001018 case SRE_OP_REPEAT_ONE:
1019 /* match repeated sequence (maximizing regexp) */
1020
1021 /* this operator only works if the repeated item is
1022 exactly one character wide, and we're not already
1023 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +00001024 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001025
1026 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1027
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001028 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1029 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001030
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001031 if (ctx->ptr + ctx->pattern[1] > end)
1032 RETURN_FAILURE; /* cannot match */
Fredrik Lundhe1869832000-08-01 22:47:49 +00001033
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001034 state->ptr = ctx->ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001035
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001036 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1037 RETURN_ON_ERROR(ret);
1038 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1039 ctx->count = ret;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001040 ctx->ptr += ctx->count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001041
1042 /* when we arrive here, count contains the number of
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001043 matches, and ctx->ptr points to the tail of the target
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001044 string. check if the rest of the pattern matches,
1045 and backtrack if not. */
1046
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001047 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001048 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001049
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001050 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001051 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001052 state->ptr = ctx->ptr;
1053 RETURN_SUCCESS;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001054 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001055
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001056 LASTMARK_SAVE();
1057
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001058 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001059 /* tail starts with a literal. skip positions where
1060 the rest of the pattern cannot possibly match */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001061 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001062 for (;;) {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001063 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001064 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1065 ctx->ptr--;
1066 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001067 }
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001068 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001069 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001070 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001071 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1072 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001073 if (ret) {
1074 RETURN_ON_ERROR(ret);
1075 RETURN_SUCCESS;
1076 }
Tim Peters3d563502006-01-21 02:47:53 +00001077
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001078 LASTMARK_RESTORE();
Tim Peters3d563502006-01-21 02:47:53 +00001079
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001080 ctx->ptr--;
1081 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001082 }
1083
1084 } else {
1085 /* general case */
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001086 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001087 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001088 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1089 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001090 if (ret) {
1091 RETURN_ON_ERROR(ret);
1092 RETURN_SUCCESS;
1093 }
1094 ctx->ptr--;
1095 ctx->count--;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001096 LASTMARK_RESTORE();
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001097 }
1098 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001099 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001100
Guido van Rossum41c99e72003-04-14 17:59:34 +00001101 case SRE_OP_MIN_REPEAT_ONE:
1102 /* match repeated sequence (minimizing regexp) */
1103
1104 /* this operator only works if the repeated item is
1105 exactly one character wide, and we're not already
1106 collecting backtracking points. for other cases,
1107 use the MIN_REPEAT operator */
1108
1109 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1110
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001111 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1112 ctx->pattern[1], ctx->pattern[2]));
Guido van Rossum41c99e72003-04-14 17:59:34 +00001113
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001114 if (ctx->ptr + ctx->pattern[1] > end)
1115 RETURN_FAILURE; /* cannot match */
Guido van Rossum41c99e72003-04-14 17:59:34 +00001116
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001117 state->ptr = ctx->ptr;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001118
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001119 if (ctx->pattern[1] == 0)
1120 ctx->count = 0;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001121 else {
1122 /* count using pattern min as the maximum */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001123 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1124 RETURN_ON_ERROR(ret);
1125 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001126 if (ret < (Py_ssize_t) ctx->pattern[1])
Tim Peters3d563502006-01-21 02:47:53 +00001127 /* didn't match minimum number of times */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001128 RETURN_FAILURE;
1129 /* advance past minimum matches of repeat */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001130 ctx->count = ret;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001131 ctx->ptr += ctx->count;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001132 }
1133
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001134 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Guido van Rossum41c99e72003-04-14 17:59:34 +00001135 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001136 state->ptr = ctx->ptr;
1137 RETURN_SUCCESS;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001138
1139 } else {
1140 /* general case */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001141 LASTMARK_SAVE();
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001142 while ((Py_ssize_t)ctx->pattern[2] == 65535
1143 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001144 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001145 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1146 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001147 if (ret) {
1148 RETURN_ON_ERROR(ret);
1149 RETURN_SUCCESS;
1150 }
1151 state->ptr = ctx->ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001152 ret = SRE_COUNT(state, ctx->pattern+3, 1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001153 RETURN_ON_ERROR(ret);
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001154 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001155 if (ret == 0)
Guido van Rossum41c99e72003-04-14 17:59:34 +00001156 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001157 assert(ret == 1);
1158 ctx->ptr++;
1159 ctx->count++;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001160 LASTMARK_RESTORE();
Guido van Rossum41c99e72003-04-14 17:59:34 +00001161 }
Guido van Rossum41c99e72003-04-14 17:59:34 +00001162 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001163 RETURN_FAILURE;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001164
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001165 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001166 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001167 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001168 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001169 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1170 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001171
1172 /* install new repeat context */
Jack Diederich2d400772006-05-27 15:44:34 +00001173 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00001174 if (!ctx->u.rep) {
1175 PyErr_NoMemory();
Neal Norwitzef0de022006-08-12 01:53:28 +00001176 RETURN_FAILURE;
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00001177 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001178 ctx->u.rep->count = -1;
1179 ctx->u.rep->pattern = ctx->pattern;
1180 ctx->u.rep->prev = state->repeat;
1181 ctx->u.rep->last_ptr = NULL;
1182 state->repeat = ctx->u.rep;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001183
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001184 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001185 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001186 state->repeat = ctx->u.rep->prev;
Jack Diederich2d400772006-05-27 15:44:34 +00001187 PyObject_FREE(ctx->u.rep);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001188
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001189 if (ret) {
1190 RETURN_ON_ERROR(ret);
1191 RETURN_SUCCESS;
1192 }
1193 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001194
1195 case SRE_OP_MAX_UNTIL:
1196 /* maximizing repeat */
1197 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1198
1199 /* FIXME: we probably need to deal with zero-width
1200 matches in here... */
1201
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001202 ctx->u.rep = state->repeat;
1203 if (!ctx->u.rep)
1204 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001205
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001206 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001207
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001208 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001209
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001210 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1211 ctx->ptr, ctx->count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001212
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001213 if (ctx->count < ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001214 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001215 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001216 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1217 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001218 if (ret) {
1219 RETURN_ON_ERROR(ret);
1220 RETURN_SUCCESS;
1221 }
1222 ctx->u.rep->count = ctx->count-1;
1223 state->ptr = ctx->ptr;
1224 RETURN_FAILURE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001225 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001226
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001227 if ((ctx->count < ctx->u.rep->pattern[2] ||
1228 ctx->u.rep->pattern[2] == 65535) &&
1229 state->ptr != ctx->u.rep->last_ptr) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001230 /* we may have enough matches, but if we can
1231 match another item, do so */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001232 ctx->u.rep->count = ctx->count;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001233 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001234 MARK_PUSH(ctx->lastmark);
1235 /* zero-width match protection */
1236 DATA_PUSH(&ctx->u.rep->last_ptr);
1237 ctx->u.rep->last_ptr = state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001238 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1239 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001240 DATA_POP(&ctx->u.rep->last_ptr);
1241 if (ret) {
1242 MARK_POP_DISCARD(ctx->lastmark);
1243 RETURN_ON_ERROR(ret);
1244 RETURN_SUCCESS;
1245 }
1246 MARK_POP(ctx->lastmark);
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001247 LASTMARK_RESTORE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001248 ctx->u.rep->count = ctx->count-1;
1249 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001250 }
1251
1252 /* cannot match more repeated items here. make sure the
1253 tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001254 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001255 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001256 RETURN_ON_SUCCESS(ret);
1257 state->repeat = ctx->u.rep;
1258 state->ptr = ctx->ptr;
1259 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001260
1261 case SRE_OP_MIN_UNTIL:
1262 /* minimizing repeat */
1263 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1264
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001265 ctx->u.rep = state->repeat;
1266 if (!ctx->u.rep)
1267 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001268
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001269 state->ptr = ctx->ptr;
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001270
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001271 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001272
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001273 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1274 ctx->ptr, ctx->count, ctx->u.rep->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001275
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001276 if (ctx->count < ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001277 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001278 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001279 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1280 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001281 if (ret) {
1282 RETURN_ON_ERROR(ret);
1283 RETURN_SUCCESS;
1284 }
1285 ctx->u.rep->count = ctx->count-1;
1286 state->ptr = ctx->ptr;
1287 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001288 }
1289
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001290 LASTMARK_SAVE();
1291
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001292 /* see if the tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001293 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001294 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001295 if (ret) {
1296 RETURN_ON_ERROR(ret);
1297 RETURN_SUCCESS;
1298 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001299
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001300 state->repeat = ctx->u.rep;
1301 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001302
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001303 LASTMARK_RESTORE();
1304
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001305 if (ctx->count >= ctx->u.rep->pattern[2]
1306 && ctx->u.rep->pattern[2] != 65535)
1307 RETURN_FAILURE;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001308
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001309 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001310 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1311 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001312 if (ret) {
1313 RETURN_ON_ERROR(ret);
1314 RETURN_SUCCESS;
1315 }
1316 ctx->u.rep->count = ctx->count-1;
1317 state->ptr = ctx->ptr;
1318 RETURN_FAILURE;
1319
1320 case SRE_OP_GROUPREF:
1321 /* match backreference */
1322 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1323 ctx->ptr, ctx->pattern[0]));
1324 i = ctx->pattern[0];
1325 {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001326 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001327 if (groupref >= state->lastmark) {
1328 RETURN_FAILURE;
1329 } else {
1330 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1331 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1332 if (!p || !e || e < p)
1333 RETURN_FAILURE;
1334 while (p < e) {
1335 if (ctx->ptr >= end || *ctx->ptr != *p)
1336 RETURN_FAILURE;
1337 p++; ctx->ptr++;
1338 }
1339 }
1340 }
1341 ctx->pattern++;
1342 break;
1343
1344 case SRE_OP_GROUPREF_IGNORE:
1345 /* match backreference */
1346 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1347 ctx->ptr, ctx->pattern[0]));
1348 i = ctx->pattern[0];
1349 {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001350 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001351 if (groupref >= state->lastmark) {
1352 RETURN_FAILURE;
1353 } else {
1354 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1355 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1356 if (!p || !e || e < p)
1357 RETURN_FAILURE;
1358 while (p < e) {
1359 if (ctx->ptr >= end ||
1360 state->lower(*ctx->ptr) != state->lower(*p))
1361 RETURN_FAILURE;
1362 p++; ctx->ptr++;
1363 }
1364 }
1365 }
1366 ctx->pattern++;
1367 break;
1368
1369 case SRE_OP_GROUPREF_EXISTS:
1370 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1371 ctx->ptr, ctx->pattern[0]));
1372 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1373 i = ctx->pattern[0];
1374 {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001375 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001376 if (groupref >= state->lastmark) {
1377 ctx->pattern += ctx->pattern[1];
1378 break;
1379 } else {
1380 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1381 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1382 if (!p || !e || e < p) {
1383 ctx->pattern += ctx->pattern[1];
1384 break;
1385 }
1386 }
1387 }
1388 ctx->pattern += 2;
1389 break;
1390
1391 case SRE_OP_ASSERT:
1392 /* assert subpattern */
1393 /* <ASSERT> <skip> <back> <pattern> */
1394 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1395 ctx->ptr, ctx->pattern[1]));
1396 state->ptr = ctx->ptr - ctx->pattern[1];
1397 if (state->ptr < state->beginning)
1398 RETURN_FAILURE;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001399 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001400 RETURN_ON_FAILURE(ret);
1401 ctx->pattern += ctx->pattern[0];
1402 break;
1403
1404 case SRE_OP_ASSERT_NOT:
1405 /* assert not subpattern */
1406 /* <ASSERT_NOT> <skip> <back> <pattern> */
1407 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1408 ctx->ptr, ctx->pattern[1]));
1409 state->ptr = ctx->ptr - ctx->pattern[1];
1410 if (state->ptr >= state->beginning) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001411 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001412 if (ret) {
1413 RETURN_ON_ERROR(ret);
1414 RETURN_FAILURE;
1415 }
1416 }
1417 ctx->pattern += ctx->pattern[0];
1418 break;
1419
1420 case SRE_OP_FAILURE:
1421 /* immediate failure */
1422 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1423 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001424
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001425 default:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001426 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1427 ctx->pattern[-1]));
1428 RETURN_ERROR(SRE_ERROR_ILLEGAL);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001429 }
1430 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001431
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001432exit:
1433 ctx_pos = ctx->last_ctx_pos;
1434 jump = ctx->jump;
1435 DATA_POP_DISCARD(ctx);
1436 if (ctx_pos == -1)
1437 return ret;
1438 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1439
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001440 switch (jump) {
1441 case JUMP_MAX_UNTIL_2:
1442 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1443 goto jump_max_until_2;
1444 case JUMP_MAX_UNTIL_3:
1445 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1446 goto jump_max_until_3;
1447 case JUMP_MIN_UNTIL_2:
1448 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1449 goto jump_min_until_2;
1450 case JUMP_MIN_UNTIL_3:
1451 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1452 goto jump_min_until_3;
1453 case JUMP_BRANCH:
1454 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1455 goto jump_branch;
1456 case JUMP_MAX_UNTIL_1:
1457 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1458 goto jump_max_until_1;
1459 case JUMP_MIN_UNTIL_1:
1460 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1461 goto jump_min_until_1;
1462 case JUMP_REPEAT:
1463 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1464 goto jump_repeat;
1465 case JUMP_REPEAT_ONE_1:
1466 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1467 goto jump_repeat_one_1;
1468 case JUMP_REPEAT_ONE_2:
1469 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1470 goto jump_repeat_one_2;
1471 case JUMP_MIN_REPEAT_ONE:
1472 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1473 goto jump_min_repeat_one;
1474 case JUMP_ASSERT:
1475 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1476 goto jump_assert;
1477 case JUMP_ASSERT_NOT:
1478 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1479 goto jump_assert_not;
1480 case JUMP_NONE:
1481 TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
1482 break;
1483 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001484
1485 return ret; /* should never get here */
Guido van Rossumb700df92000-03-31 14:59:30 +00001486}
1487
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001488LOCAL(Py_ssize_t)
Guido van Rossumb700df92000-03-31 14:59:30 +00001489SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1490{
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001491 SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1492 SRE_CHAR* end = (SRE_CHAR *)state->end;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001493 Py_ssize_t status = 0;
1494 Py_ssize_t prefix_len = 0;
1495 Py_ssize_t prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001496 SRE_CODE* prefix = NULL;
1497 SRE_CODE* charset = NULL;
1498 SRE_CODE* overlap = NULL;
1499 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001500
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001501 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001502 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001503 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001504
1505 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001506
Gustavo Niemeyer28b5bb32003-06-26 14:41:08 +00001507 if (pattern[3] > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001508 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001509 character in there, so literal search will work) */
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001510 end -= pattern[3]-1;
1511 if (end <= ptr)
1512 end = ptr+1;
1513 }
1514
Fredrik Lundh3562f112000-07-02 12:00:07 +00001515 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001516 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001517 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001518 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001519 prefix_skip = pattern[6];
1520 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001521 overlap = prefix + prefix_len - 1;
1522 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001523 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001524 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001525 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001526
1527 pattern += 1 + pattern[1];
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001528 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001529
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001530 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1531 TRACE(("charset = %p\n", charset));
1532
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001533#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001534 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001535 /* pattern starts with a known prefix. use the overlap
1536 table to skip forward as fast as we possibly can */
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001537 Py_ssize_t i = 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001538 end = (SRE_CHAR *)state->end;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001539 while (ptr < end) {
1540 for (;;) {
Fredrik Lundh0640e112000-06-30 13:55:15 +00001541 if ((SRE_CODE) ptr[0] != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001542 if (!i)
1543 break;
1544 else
1545 i = overlap[i];
1546 } else {
1547 if (++i == prefix_len) {
1548 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001549 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1550 state->start = ptr + 1 - prefix_len;
1551 state->ptr = ptr + 1 - prefix_len + prefix_skip;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001552 if (flags & SRE_INFO_LITERAL)
1553 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001554 status = SRE_MATCH(state, pattern + 2*prefix_skip);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001555 if (status != 0)
1556 return status;
1557 /* close but no cigar -- try again */
1558 i = overlap[i];
1559 }
1560 break;
1561 }
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001562 }
1563 ptr++;
1564 }
1565 return 0;
1566 }
1567#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001568
Fredrik Lundh3562f112000-07-02 12:00:07 +00001569 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001570 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001571 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001572 SRE_CODE chr = pattern[1];
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001573 end = (SRE_CHAR *)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001574 for (;;) {
1575 while (ptr < end && (SRE_CODE) ptr[0] != chr)
1576 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001577 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001578 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001579 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001580 state->start = ptr;
1581 state->ptr = ++ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001582 if (flags & SRE_INFO_LITERAL)
1583 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001584 status = SRE_MATCH(state, pattern + 2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001585 if (status != 0)
1586 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001587 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001588 } else if (charset) {
1589 /* pattern starts with a character from a known set */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001590 end = (SRE_CHAR *)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001591 for (;;) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001592 while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001593 ptr++;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001594 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001595 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001596 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001597 state->start = ptr;
1598 state->ptr = ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001599 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001600 if (status != 0)
1601 break;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001602 ptr++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001603 }
1604 } else
1605 /* general case */
1606 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001607 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001608 state->start = state->ptr = ptr++;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001609 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001610 if (status != 0)
1611 break;
1612 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001613
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001614 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001615}
Tim Peters3d563502006-01-21 02:47:53 +00001616
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001617LOCAL(int)
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001618SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001619{
1620 /* check if given string is a literal template (i.e. no escapes) */
1621 while (len-- > 0)
1622 if (*ptr++ == '\\')
1623 return 0;
1624 return 1;
1625}
Guido van Rossumb700df92000-03-31 14:59:30 +00001626
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001627#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001628
1629/* -------------------------------------------------------------------- */
1630/* factories and destructors */
1631
1632/* see sre.h for object declarations */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00001633static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1634static PyObject*pattern_scanner(PatternObject*, PyObject*);
Guido van Rossumb700df92000-03-31 14:59:30 +00001635
1636static PyObject *
Georg Brandl964f5972006-05-28 22:38:57 +00001637sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +00001638{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001639 return Py_BuildValue("l", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001640}
1641
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001642static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001643sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001644{
1645 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001646 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001647 return NULL;
1648 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001649 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001650 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001651#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001652 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001653#else
1654 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001655#endif
Fredrik Lundhb389df32000-06-29 12:48:37 +00001656 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001657}
1658
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001659LOCAL(void)
1660state_reset(SRE_STATE* state)
1661{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001662 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001663 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001664
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001665 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001666 state->lastindex = -1;
1667
1668 state->repeat = NULL;
1669
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001670 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001671}
1672
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001673static void*
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001674getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
Guido van Rossumb700df92000-03-31 14:59:30 +00001675{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001676 /* given a python object, return a data pointer, a length (in
1677 characters), and a character size. return NULL if the object
1678 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +00001679
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001680 PyBufferProcs *buffer;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001681 Py_ssize_t size, bytes;
1682 int charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001683 void* ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001684
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001685#if defined(HAVE_UNICODE)
1686 if (PyUnicode_Check(string)) {
1687 /* unicode strings doesn't always support the buffer interface */
1688 ptr = (void*) PyUnicode_AS_DATA(string);
1689 bytes = PyUnicode_GET_DATA_SIZE(string);
1690 size = PyUnicode_GET_SIZE(string);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001691 charsize = sizeof(Py_UNICODE);
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001692
1693 } else {
1694#endif
1695
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001696 /* get pointer to string buffer */
Christian Heimese93237d2007-12-19 02:37:44 +00001697 buffer = Py_TYPE(string)->tp_as_buffer;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001698 if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1699 buffer->bf_getsegcount(string, NULL) != 1) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001700 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001701 return NULL;
1702 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001703
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001704 /* determine buffer size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001705 bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1706 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001707 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1708 return NULL;
1709 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001710
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001711 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001712#if PY_VERSION_HEX >= 0x01060000
1713 size = PyObject_Size(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001714#else
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001715 size = PyObject_Length(string);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001716#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00001717
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001718 if (PyString_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001719 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001720#if defined(HAVE_UNICODE)
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001721 else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001722 charsize = sizeof(Py_UNICODE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001723#endif
1724 else {
1725 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1726 return NULL;
1727 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001728
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001729#if defined(HAVE_UNICODE)
1730 }
1731#endif
1732
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001733 *p_length = size;
1734 *p_charsize = charsize;
1735
1736 return ptr;
1737}
1738
1739LOCAL(PyObject*)
1740state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001741 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001742{
1743 /* prepare state object */
1744
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001745 Py_ssize_t length;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001746 int charsize;
1747 void* ptr;
1748
1749 memset(state, 0, sizeof(SRE_STATE));
1750
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001751 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001752 state->lastindex = -1;
1753
1754 ptr = getstring(string, &length, &charsize);
1755 if (!ptr)
1756 return NULL;
1757
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001758 /* adjust boundaries */
1759 if (start < 0)
1760 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001761 else if (start > length)
1762 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001763
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001764 if (end < 0)
1765 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001766 else if (end > length)
1767 end = length;
1768
1769 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001770
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001771 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001772
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001773 state->start = (void*) ((char*) ptr + start * state->charsize);
1774 state->end = (void*) ((char*) ptr + end * state->charsize);
1775
1776 Py_INCREF(string);
1777 state->string = string;
1778 state->pos = start;
1779 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001780
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001781 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001782 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001783 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001784#if defined(HAVE_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001785 state->lower = sre_lower_unicode;
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00001786#else
1787 state->lower = sre_lower_locale;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001788#endif
1789 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001790 state->lower = sre_lower;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001791
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001792 return string;
Guido van Rossumb700df92000-03-31 14:59:30 +00001793}
1794
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001795LOCAL(void)
1796state_fini(SRE_STATE* state)
1797{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001798 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001799 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001800}
1801
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001802/* calculate offset from start of string */
1803#define STATE_OFFSET(state, member)\
1804 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1805
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001806LOCAL(PyObject*)
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001807state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001808{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001809 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +00001810
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001811 index = (index - 1) * 2;
1812
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001813 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001814 if (empty)
1815 /* want empty string */
1816 i = j = 0;
1817 else {
1818 Py_INCREF(Py_None);
1819 return Py_None;
1820 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001821 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001822 i = STATE_OFFSET(state, state->mark[index]);
1823 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001824 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001825
Fredrik Lundh58100642000-08-09 09:14:35 +00001826 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001827}
1828
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001829static void
1830pattern_error(int status)
1831{
1832 switch (status) {
1833 case SRE_ERROR_RECURSION_LIMIT:
1834 PyErr_SetString(
1835 PyExc_RuntimeError,
1836 "maximum recursion limit exceeded"
1837 );
1838 break;
1839 case SRE_ERROR_MEMORY:
1840 PyErr_NoMemory();
1841 break;
Facundo Batista4473d222008-01-08 21:10:12 +00001842 case SRE_ERROR_INTERRUPTED:
1843 /* An exception has already been raised, so let it fly */
1844 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001845 default:
1846 /* other error codes indicate compiler/engine bugs */
1847 PyErr_SetString(
1848 PyExc_RuntimeError,
1849 "internal error in regular expression engine"
1850 );
1851 }
1852}
1853
Guido van Rossumb700df92000-03-31 14:59:30 +00001854static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001855pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001856{
Raymond Hettinger027bb632004-05-31 03:09:25 +00001857 if (self->weakreflist != NULL)
1858 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001859 Py_XDECREF(self->pattern);
1860 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001861 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001862 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001863}
1864
1865static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001866pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001867{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001868 SRE_STATE state;
1869 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001870
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001871 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001872 Py_ssize_t start = 0;
1873 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001874 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001875 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001876 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001877 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001878
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001879 string = state_init(&state, self, string, start, end);
1880 if (!string)
1881 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001882
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001883 state.ptr = state.start;
1884
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001885 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1886
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001887 if (state.charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001888 status = sre_match(&state, PatternObject_GetCode(self));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001889 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001890#if defined(HAVE_UNICODE)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001891 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001892#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001894
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001895 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00001896 if (PyErr_Occurred())
1897 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001898
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001899 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001900
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001901 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001902}
1903
1904static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001905pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001906{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001907 SRE_STATE state;
1908 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001909
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001910 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001911 Py_ssize_t start = 0;
1912 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001913 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00001914 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001915 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001916 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001917
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001918 string = state_init(&state, self, string, start, end);
1919 if (!string)
1920 return NULL;
1921
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001922 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1923
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001924 if (state.charsize == 1) {
1925 status = sre_search(&state, PatternObject_GetCode(self));
1926 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001927#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001928 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00001929#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001930 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001931
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001932 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001935
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00001936 if (PyErr_Occurred())
1937 return NULL;
1938
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001939 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001940}
1941
1942static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001943call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001944{
1945 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001946 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001947 PyObject* func;
1948 PyObject* result;
1949
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001950 if (!args)
1951 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001952 name = PyString_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001953 if (!name)
1954 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001955 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001956 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001957 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001958 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001959 func = PyObject_GetAttrString(mod, function);
1960 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001961 if (!func)
1962 return NULL;
1963 result = PyObject_CallObject(func, args);
1964 Py_DECREF(func);
1965 Py_DECREF(args);
1966 return result;
1967}
1968
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001969#ifdef USE_BUILTIN_COPY
1970static int
1971deepcopy(PyObject** object, PyObject* memo)
1972{
1973 PyObject* copy;
1974
1975 copy = call(
1976 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001977 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001978 );
1979 if (!copy)
1980 return 0;
1981
1982 Py_DECREF(*object);
1983 *object = copy;
1984
1985 return 1; /* success */
1986}
1987#endif
1988
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001989static PyObject*
Guido van Rossum1ff91d92007-09-10 22:02:25 +00001990join_list(PyObject* list, PyObject* string)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001991{
1992 /* join list elements */
1993
1994 PyObject* joiner;
1995#if PY_VERSION_HEX >= 0x01060000
1996 PyObject* function;
1997 PyObject* args;
1998#endif
1999 PyObject* result;
2000
Guido van Rossum1ff91d92007-09-10 22:02:25 +00002001 joiner = PySequence_GetSlice(string, 0, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002002 if (!joiner)
2003 return NULL;
2004
Guido van Rossum1ff91d92007-09-10 22:02:25 +00002005 if (PyList_GET_SIZE(list) == 0) {
2006 Py_DECREF(list);
2007 return joiner;
2008 }
2009
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002010#if PY_VERSION_HEX >= 0x01060000
2011 function = PyObject_GetAttrString(joiner, "join");
2012 if (!function) {
2013 Py_DECREF(joiner);
2014 return NULL;
2015 }
2016 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002017 if (!args) {
2018 Py_DECREF(function);
2019 Py_DECREF(joiner);
2020 return NULL;
2021 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002022 PyTuple_SET_ITEM(args, 0, list);
2023 result = PyObject_CallObject(function, args);
2024 Py_DECREF(args); /* also removes list */
2025 Py_DECREF(function);
2026#else
2027 result = call(
2028 "string", "join",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002029 PyTuple_Pack(2, list, joiner)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002030 );
2031#endif
2032 Py_DECREF(joiner);
2033
2034 return result;
2035}
2036
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002037static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002038pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002039{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002040 SRE_STATE state;
2041 PyObject* list;
2042 int status;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002043 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00002044
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002046 Py_ssize_t start = 0;
2047 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002048 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002049 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00002050 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002051 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002052
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002053 string = state_init(&state, self, string, start, end);
2054 if (!string)
2055 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002056
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002057 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002058 if (!list) {
2059 state_fini(&state);
2060 return NULL;
2061 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002062
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002063 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002064
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002065 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +00002066
Fredrik Lundhebc37b22000-10-28 19:30:41 +00002067 state_reset(&state);
2068
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002069 state.ptr = state.start;
2070
2071 if (state.charsize == 1) {
2072 status = sre_search(&state, PatternObject_GetCode(self));
2073 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002074#if defined(HAVE_UNICODE)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002075 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002076#endif
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002077 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002078
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00002079 if (PyErr_Occurred())
2080 goto error;
2081
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002082 if (status <= 0) {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002083 if (status == 0)
2084 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00002085 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002086 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002087 }
Tim Peters3d563502006-01-21 02:47:53 +00002088
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002089 /* don't bother to build a match object */
2090 switch (self->groups) {
2091 case 0:
2092 b = STATE_OFFSET(&state, state.start);
2093 e = STATE_OFFSET(&state, state.ptr);
2094 item = PySequence_GetSlice(string, b, e);
2095 if (!item)
2096 goto error;
2097 break;
2098 case 1:
2099 item = state_getslice(&state, 1, string, 1);
2100 if (!item)
2101 goto error;
2102 break;
2103 default:
2104 item = PyTuple_New(self->groups);
2105 if (!item)
2106 goto error;
2107 for (i = 0; i < self->groups; i++) {
2108 PyObject* o = state_getslice(&state, i+1, string, 1);
2109 if (!o) {
2110 Py_DECREF(item);
2111 goto error;
2112 }
2113 PyTuple_SET_ITEM(item, i, o);
2114 }
2115 break;
2116 }
2117
2118 status = PyList_Append(list, item);
2119 Py_DECREF(item);
2120 if (status < 0)
2121 goto error;
2122
2123 if (state.ptr == state.start)
2124 state.start = (void*) ((char*) state.ptr + state.charsize);
2125 else
2126 state.start = state.ptr;
2127
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002128 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002129
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002130 state_fini(&state);
2131 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00002132
2133error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002134 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002135 state_fini(&state);
2136 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002137
Guido van Rossumb700df92000-03-31 14:59:30 +00002138}
2139
Fredrik Lundh703ce812001-10-24 22:16:30 +00002140#if PY_VERSION_HEX >= 0x02020000
2141static PyObject*
2142pattern_finditer(PatternObject* pattern, PyObject* args)
2143{
2144 PyObject* scanner;
2145 PyObject* search;
2146 PyObject* iterator;
2147
2148 scanner = pattern_scanner(pattern, args);
2149 if (!scanner)
2150 return NULL;
2151
2152 search = PyObject_GetAttrString(scanner, "search");
2153 Py_DECREF(scanner);
2154 if (!search)
2155 return NULL;
2156
2157 iterator = PyCallIter_New(search, Py_None);
2158 Py_DECREF(search);
2159
2160 return iterator;
2161}
2162#endif
2163
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002164static PyObject*
2165pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2166{
2167 SRE_STATE state;
2168 PyObject* list;
2169 PyObject* item;
2170 int status;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002171 Py_ssize_t n;
2172 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002173 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002174
2175 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002176 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002177 static char* kwlist[] = { "source", "maxsplit", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002178 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002179 &string, &maxsplit))
2180 return NULL;
2181
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002182 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002183 if (!string)
2184 return NULL;
2185
2186 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002187 if (!list) {
2188 state_fini(&state);
2189 return NULL;
2190 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002191
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002192 n = 0;
2193 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002194
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002195 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002196
2197 state_reset(&state);
2198
2199 state.ptr = state.start;
2200
2201 if (state.charsize == 1) {
2202 status = sre_search(&state, PatternObject_GetCode(self));
2203 } else {
2204#if defined(HAVE_UNICODE)
2205 status = sre_usearch(&state, PatternObject_GetCode(self));
2206#endif
2207 }
2208
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00002209 if (PyErr_Occurred())
2210 goto error;
2211
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002212 if (status <= 0) {
2213 if (status == 0)
2214 break;
2215 pattern_error(status);
2216 goto error;
2217 }
Tim Peters3d563502006-01-21 02:47:53 +00002218
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002219 if (state.start == state.ptr) {
2220 if (last == state.end)
2221 break;
2222 /* skip one character */
2223 state.start = (void*) ((char*) state.ptr + state.charsize);
2224 continue;
2225 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002226
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002227 /* get segment before this match */
2228 item = PySequence_GetSlice(
2229 string, STATE_OFFSET(&state, last),
2230 STATE_OFFSET(&state, state.start)
2231 );
2232 if (!item)
2233 goto error;
2234 status = PyList_Append(list, item);
2235 Py_DECREF(item);
2236 if (status < 0)
2237 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002238
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002239 /* add groups (if any) */
2240 for (i = 0; i < self->groups; i++) {
2241 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002242 if (!item)
2243 goto error;
2244 status = PyList_Append(list, item);
2245 Py_DECREF(item);
2246 if (status < 0)
2247 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002248 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002249
2250 n = n + 1;
2251
2252 last = state.start = state.ptr;
2253
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002254 }
2255
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002256 /* get segment following last match (even if empty) */
2257 item = PySequence_GetSlice(
2258 string, STATE_OFFSET(&state, last), state.endpos
2259 );
2260 if (!item)
2261 goto error;
2262 status = PyList_Append(list, item);
2263 Py_DECREF(item);
2264 if (status < 0)
2265 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002266
2267 state_fini(&state);
2268 return list;
2269
2270error:
2271 Py_DECREF(list);
2272 state_fini(&state);
2273 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002274
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002275}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002276
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002277static PyObject*
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002278pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002279 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002280{
2281 SRE_STATE state;
2282 PyObject* list;
2283 PyObject* item;
2284 PyObject* filter;
2285 PyObject* args;
2286 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002287 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002288 int status;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002289 Py_ssize_t n;
2290 Py_ssize_t i, b, e;
2291 int bint;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002292 int filter_is_callable;
2293
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002294 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002295 /* sub/subn takes either a function or a template */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002296 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002297 Py_INCREF(filter);
2298 filter_is_callable = 1;
2299 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002300 /* if not callable, check if it's a literal string */
2301 int literal;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002302 ptr = getstring(ptemplate, &n, &bint);
2303 b = bint;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002304 if (ptr) {
2305 if (b == 1) {
Skip Montanaro816a1622006-04-18 11:53:09 +00002306 literal = sre_literal_template((unsigned char *)ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002307 } else {
2308#if defined(HAVE_UNICODE)
Skip Montanaro816a1622006-04-18 11:53:09 +00002309 literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002310#endif
2311 }
2312 } else {
2313 PyErr_Clear();
2314 literal = 0;
2315 }
2316 if (literal) {
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002317 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002318 Py_INCREF(filter);
2319 filter_is_callable = 0;
2320 } else {
2321 /* not a literal; hand it over to the template compiler */
2322 filter = call(
Neal Norwitz94a9c092006-03-16 06:30:02 +00002323 SRE_PY_MODULE, "_subx",
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002324 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002325 );
2326 if (!filter)
2327 return NULL;
2328 filter_is_callable = PyCallable_Check(filter);
2329 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002330 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002331
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002332 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002333 if (!string) {
2334 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002335 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002336 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002337
2338 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002339 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002340 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002341 state_fini(&state);
2342 return NULL;
2343 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002344
2345 n = i = 0;
2346
2347 while (!count || n < count) {
2348
2349 state_reset(&state);
2350
2351 state.ptr = state.start;
2352
2353 if (state.charsize == 1) {
2354 status = sre_search(&state, PatternObject_GetCode(self));
2355 } else {
2356#if defined(HAVE_UNICODE)
2357 status = sre_usearch(&state, PatternObject_GetCode(self));
2358#endif
2359 }
2360
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00002361 if (PyErr_Occurred())
2362 goto error;
2363
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002364 if (status <= 0) {
2365 if (status == 0)
2366 break;
2367 pattern_error(status);
2368 goto error;
2369 }
Tim Peters3d563502006-01-21 02:47:53 +00002370
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002371 b = STATE_OFFSET(&state, state.start);
2372 e = STATE_OFFSET(&state, state.ptr);
2373
2374 if (i < b) {
2375 /* get segment before this match */
2376 item = PySequence_GetSlice(string, i, b);
2377 if (!item)
2378 goto error;
2379 status = PyList_Append(list, item);
2380 Py_DECREF(item);
2381 if (status < 0)
2382 goto error;
2383
2384 } else if (i == b && i == e && n > 0)
2385 /* ignore empty match on latest position */
2386 goto next;
2387
2388 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002389 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002390 match = pattern_new_match(self, &state, 1);
2391 if (!match)
2392 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002393 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002394 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002395 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002396 goto error;
2397 }
2398 item = PyObject_CallObject(filter, args);
2399 Py_DECREF(args);
2400 Py_DECREF(match);
2401 if (!item)
2402 goto error;
2403 } else {
2404 /* filter is literal string */
2405 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002406 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002407 }
2408
2409 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002410 if (item != Py_None) {
2411 status = PyList_Append(list, item);
2412 Py_DECREF(item);
2413 if (status < 0)
2414 goto error;
2415 }
Tim Peters3d563502006-01-21 02:47:53 +00002416
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002417 i = e;
2418 n = n + 1;
2419
2420next:
2421 /* move on */
2422 if (state.ptr == state.start)
2423 state.start = (void*) ((char*) state.ptr + state.charsize);
2424 else
2425 state.start = state.ptr;
2426
2427 }
2428
2429 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002430 if (i < state.endpos) {
2431 item = PySequence_GetSlice(string, i, state.endpos);
2432 if (!item)
2433 goto error;
2434 status = PyList_Append(list, item);
2435 Py_DECREF(item);
2436 if (status < 0)
2437 goto error;
2438 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002439
2440 state_fini(&state);
2441
Guido van Rossum4e173842001-12-07 04:25:10 +00002442 Py_DECREF(filter);
2443
Fredrik Lundhdac58492001-10-21 21:48:30 +00002444 /* convert list to single string (also removes list) */
Guido van Rossum1ff91d92007-09-10 22:02:25 +00002445 item = join_list(list, string);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002446
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002447 if (!item)
2448 return NULL;
2449
2450 if (subn)
2451 return Py_BuildValue("Ni", item, n);
2452
2453 return item;
2454
2455error:
2456 Py_DECREF(list);
2457 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002458 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002459 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002460
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002461}
2462
2463static PyObject*
2464pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2465{
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002466 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002467 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002468 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002469 static char* kwlist[] = { "repl", "string", "count", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002470 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002471 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002472 return NULL;
2473
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002474 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002475}
2476
2477static PyObject*
2478pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2479{
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002480 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002481 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002482 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002483 static char* kwlist[] = { "repl", "string", "count", NULL };
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002484 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002485 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002486 return NULL;
2487
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002488 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002489}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002490
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002491static PyObject*
Georg Brandl964f5972006-05-28 22:38:57 +00002492pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002493{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002494#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002495 PatternObject* copy;
2496 int offset;
2497
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002498 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2499 if (!copy)
2500 return NULL;
2501
2502 offset = offsetof(PatternObject, groups);
2503
2504 Py_XINCREF(self->groupindex);
2505 Py_XINCREF(self->indexgroup);
2506 Py_XINCREF(self->pattern);
2507
2508 memcpy((char*) copy + offset, (char*) self + offset,
2509 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00002510 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002511
2512 return (PyObject*) copy;
2513#else
2514 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2515 return NULL;
2516#endif
2517}
2518
2519static PyObject*
Georg Brandlfbef5882006-05-28 22:14:04 +00002520pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002521{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002522#ifdef USE_BUILTIN_COPY
2523 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002524
Georg Brandlfbef5882006-05-28 22:14:04 +00002525 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002526 if (!copy)
2527 return NULL;
2528
2529 if (!deepcopy(&copy->groupindex, memo) ||
2530 !deepcopy(&copy->indexgroup, memo) ||
2531 !deepcopy(&copy->pattern, memo)) {
2532 Py_DECREF(copy);
2533 return NULL;
2534 }
2535
2536#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002537 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2538 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002539#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002540}
2541
Raymond Hettinger94478742004-09-24 04:31:19 +00002542PyDoc_STRVAR(pattern_match_doc,
2543"match(string[, pos[, endpos]]) --> match object or None.\n\
2544 Matches zero or more characters at the beginning of the string");
2545
2546PyDoc_STRVAR(pattern_search_doc,
2547"search(string[, pos[, endpos]]) --> match object or None.\n\
2548 Scan through string looking for a match, and return a corresponding\n\
2549 MatchObject instance. Return None if no position in the string matches.");
2550
2551PyDoc_STRVAR(pattern_split_doc,
2552"split(string[, maxsplit = 0]) --> list.\n\
2553 Split string by the occurrences of pattern.");
2554
2555PyDoc_STRVAR(pattern_findall_doc,
2556"findall(string[, pos[, endpos]]) --> list.\n\
2557 Return a list of all non-overlapping matches of pattern in string.");
2558
2559PyDoc_STRVAR(pattern_finditer_doc,
2560"finditer(string[, pos[, endpos]]) --> iterator.\n\
2561 Return an iterator over all non-overlapping matches for the \n\
2562 RE pattern in string. For each match, the iterator returns a\n\
2563 match object.");
2564
2565PyDoc_STRVAR(pattern_sub_doc,
2566"sub(repl, string[, count = 0]) --> newstring\n\
2567 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00002568 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002569
2570PyDoc_STRVAR(pattern_subn_doc,
2571"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2572 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2573 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00002574 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002575
2576PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2577
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002578static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00002579 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002580 pattern_match_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002581 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002582 pattern_search_doc},
2583 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
2584 pattern_sub_doc},
2585 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
2586 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002587 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002588 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002589 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Raymond Hettinger94478742004-09-24 04:31:19 +00002590 pattern_findall_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002591#if PY_VERSION_HEX >= 0x02020000
Raymond Hettinger94478742004-09-24 04:31:19 +00002592 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
2593 pattern_finditer_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002594#endif
Fredrik Lundh562586e2000-10-03 20:43:34 +00002595 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
Georg Brandlfbef5882006-05-28 22:14:04 +00002596 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2597 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002598 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002599};
2600
Tim Peters3d563502006-01-21 02:47:53 +00002601static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002602pattern_getattr(PatternObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00002603{
2604 PyObject* res;
2605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002606 res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
Guido van Rossumb700df92000-03-31 14:59:30 +00002607
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002608 if (res)
2609 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00002610
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002611 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00002612
2613 /* attributes */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002614 if (!strcmp(name, "pattern")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002615 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002616 return self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00002617 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002618
2619 if (!strcmp(name, "flags"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002620 return Py_BuildValue("i", self->flags);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002621
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002622 if (!strcmp(name, "groups"))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002623 return Py_BuildValue("i", self->groups);
Fredrik Lundh01016fe2000-06-30 00:27:46 +00002624
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002625 if (!strcmp(name, "groupindex") && self->groupindex) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002626 Py_INCREF(self->groupindex);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002627 return self->groupindex;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002628 }
2629
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002630 PyErr_SetString(PyExc_AttributeError, name);
2631 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002632}
2633
2634statichere PyTypeObject Pattern_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002635 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00002636 0, "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002637 sizeof(PatternObject), sizeof(SRE_CODE),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002638 (destructor)pattern_dealloc, /*tp_dealloc*/
2639 0, /*tp_print*/
Raymond Hettinger027bb632004-05-31 03:09:25 +00002640 (getattrfunc)pattern_getattr, /*tp_getattr*/
2641 0, /* tp_setattr */
2642 0, /* tp_compare */
2643 0, /* tp_repr */
2644 0, /* tp_as_number */
2645 0, /* tp_as_sequence */
2646 0, /* tp_as_mapping */
2647 0, /* tp_hash */
2648 0, /* tp_call */
2649 0, /* tp_str */
2650 0, /* tp_getattro */
2651 0, /* tp_setattro */
2652 0, /* tp_as_buffer */
2653 Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
Raymond Hettinger94478742004-09-24 04:31:19 +00002654 pattern_doc, /* tp_doc */
Raymond Hettinger027bb632004-05-31 03:09:25 +00002655 0, /* tp_traverse */
2656 0, /* tp_clear */
2657 0, /* tp_richcompare */
2658 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002659};
2660
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002661static PyObject *
2662_compile(PyObject* self_, PyObject* args)
2663{
2664 /* "compile" pattern descriptor to pattern object */
2665
2666 PatternObject* self;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002667 Py_ssize_t i, n;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002668
2669 PyObject* pattern;
2670 int flags = 0;
2671 PyObject* code;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002672 Py_ssize_t groups = 0;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002673 PyObject* groupindex = NULL;
2674 PyObject* indexgroup = NULL;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002675 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002676 &PyList_Type, &code, &groups,
2677 &groupindex, &indexgroup))
2678 return NULL;
2679
2680 n = PyList_GET_SIZE(code);
Christian Heimes4956d2b2008-01-18 19:12:56 +00002681 /* coverity[ampersand_in_size] */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002682 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2683 if (!self)
2684 return NULL;
2685
2686 self->codesize = n;
2687
2688 for (i = 0; i < n; i++) {
2689 PyObject *o = PyList_GET_ITEM(code, i);
2690 unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
2691 : PyLong_AsUnsignedLong(o);
2692 self->code[i] = (SRE_CODE) value;
2693 if ((unsigned long) self->code[i] != value) {
2694 PyErr_SetString(PyExc_OverflowError,
2695 "regular expression code size limit exceeded");
2696 break;
2697 }
2698 }
2699
2700 if (PyErr_Occurred()) {
2701 PyObject_DEL(self);
2702 return NULL;
2703 }
2704
2705 Py_INCREF(pattern);
2706 self->pattern = pattern;
2707
2708 self->flags = flags;
2709
2710 self->groups = groups;
2711
2712 Py_XINCREF(groupindex);
2713 self->groupindex = groupindex;
2714
2715 Py_XINCREF(indexgroup);
2716 self->indexgroup = indexgroup;
2717
2718 self->weakreflist = NULL;
2719
2720 return (PyObject*) self;
2721}
2722
Guido van Rossumb700df92000-03-31 14:59:30 +00002723/* -------------------------------------------------------------------- */
2724/* match methods */
2725
2726static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002727match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002728{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002729 Py_XDECREF(self->regs);
2730 Py_XDECREF(self->string);
2731 Py_DECREF(self->pattern);
2732 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00002733}
2734
2735static PyObject*
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002736match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002737{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002738 if (index < 0 || index >= self->groups) {
2739 /* raise IndexError if we were given a bad group number */
2740 PyErr_SetString(
2741 PyExc_IndexError,
2742 "no such group"
2743 );
2744 return NULL;
2745 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002746
Fredrik Lundh6f013982000-07-03 18:44:21 +00002747 index *= 2;
2748
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002749 if (self->string == Py_None || self->mark[index] < 0) {
2750 /* return default value if the string or group is undefined */
2751 Py_INCREF(def);
2752 return def;
2753 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002755 return PySequence_GetSlice(
2756 self->string, self->mark[index], self->mark[index+1]
2757 );
Guido van Rossumb700df92000-03-31 14:59:30 +00002758}
2759
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002760static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002761match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002762{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002763 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002764
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002765 if (PyInt_Check(index))
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002766 return PyInt_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00002767
Fredrik Lundh6f013982000-07-03 18:44:21 +00002768 i = -1;
2769
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002770 if (self->pattern->groupindex) {
2771 index = PyObject_GetItem(self->pattern->groupindex, index);
2772 if (index) {
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002773 if (PyInt_Check(index) || PyLong_Check(index))
2774 i = PyInt_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002775 Py_DECREF(index);
2776 } else
2777 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002778 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002779
2780 return i;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002781}
2782
2783static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002784match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002785{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002786 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002787}
2788
2789static PyObject*
Georg Brandlfbef5882006-05-28 22:14:04 +00002790match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002791{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002792 /* delegate to Python code */
2793 return call(
Neal Norwitz94a9c092006-03-16 06:30:02 +00002794 SRE_PY_MODULE, "_expand",
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00002795 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002796 );
2797}
2798
2799static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002800match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002801{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002802 PyObject* result;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002803 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002804
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002805 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002806
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002807 switch (size) {
2808 case 0:
2809 result = match_getslice(self, Py_False, Py_None);
2810 break;
2811 case 1:
2812 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2813 break;
2814 default:
2815 /* fetch multiple items */
2816 result = PyTuple_New(size);
2817 if (!result)
2818 return NULL;
2819 for (i = 0; i < size; i++) {
2820 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002821 self, PyTuple_GET_ITEM(args, i), Py_None
2822 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002823 if (!item) {
2824 Py_DECREF(result);
2825 return NULL;
2826 }
2827 PyTuple_SET_ITEM(result, i, item);
2828 }
2829 break;
2830 }
2831 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002832}
2833
2834static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002835match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002836{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002837 PyObject* result;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002838 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002839
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002840 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002841 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002842 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002843 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002844
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002845 result = PyTuple_New(self->groups-1);
2846 if (!result)
2847 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002848
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002849 for (index = 1; index < self->groups; index++) {
2850 PyObject* item;
2851 item = match_getslice_by_index(self, index, def);
2852 if (!item) {
2853 Py_DECREF(result);
2854 return NULL;
2855 }
2856 PyTuple_SET_ITEM(result, index-1, item);
2857 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002858
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002859 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002860}
2861
2862static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002863match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002864{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002865 PyObject* result;
2866 PyObject* keys;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002867 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002868
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002869 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002870 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002871 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002872 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002873
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002874 result = PyDict_New();
2875 if (!result || !self->pattern->groupindex)
2876 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002877
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002878 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002879 if (!keys)
2880 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002881
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002882 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002883 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002884 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002885 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002886 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002887 if (!key)
2888 goto failed;
2889 value = match_getslice(self, key, def);
2890 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002891 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002892 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002893 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002894 status = PyDict_SetItem(result, key, value);
2895 Py_DECREF(value);
2896 if (status < 0)
2897 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002898 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002899
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002900 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002901
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002902 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002903
2904failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002905 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002906 Py_DECREF(result);
2907 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002908}
2909
2910static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002911match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002912{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002913 Py_ssize_t index;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002914
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002915 PyObject* index_ = Py_False; /* zero */
Georg Brandl96a8c392006-05-29 21:04:52 +00002916 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002917 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002918
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002919 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002920
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002921 if (index < 0 || index >= self->groups) {
2922 PyErr_SetString(
2923 PyExc_IndexError,
2924 "no such group"
2925 );
2926 return NULL;
2927 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002928
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002929 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002930 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002931}
2932
2933static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002934match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002935{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002936 Py_ssize_t index;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002937
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002938 PyObject* index_ = Py_False; /* zero */
Georg Brandl96a8c392006-05-29 21:04:52 +00002939 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002940 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002941
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002942 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002943
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002944 if (index < 0 || index >= self->groups) {
2945 PyErr_SetString(
2946 PyExc_IndexError,
2947 "no such group"
2948 );
2949 return NULL;
2950 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002951
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002952 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002953 return Py_BuildValue("i", self->mark[index*2+1]);
2954}
2955
2956LOCAL(PyObject*)
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002957_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002958{
2959 PyObject* pair;
2960 PyObject* item;
2961
2962 pair = PyTuple_New(2);
2963 if (!pair)
2964 return NULL;
2965
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002966 item = PyInt_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002967 if (!item)
2968 goto error;
2969 PyTuple_SET_ITEM(pair, 0, item);
2970
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002971 item = PyInt_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002972 if (!item)
2973 goto error;
2974 PyTuple_SET_ITEM(pair, 1, item);
2975
2976 return pair;
2977
2978 error:
2979 Py_DECREF(pair);
2980 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002981}
2982
2983static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002984match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002985{
Neal Norwitza6d80fa2006-06-12 03:05:40 +00002986 Py_ssize_t index;
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002987
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002988 PyObject* index_ = Py_False; /* zero */
Georg Brandl96a8c392006-05-29 21:04:52 +00002989 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002990 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002991
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002992 index = match_getindex(self, index_);
Fredrik Lundh436c3d52000-06-29 08:58:44 +00002993
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002994 if (index < 0 || index >= self->groups) {
2995 PyErr_SetString(
2996 PyExc_IndexError,
2997 "no such group"
2998 );
2999 return NULL;
3000 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003001
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003002 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003003 return _pair(self->mark[index*2], self->mark[index*2+1]);
3004}
3005
3006static PyObject*
3007match_regs(MatchObject* self)
3008{
3009 PyObject* regs;
3010 PyObject* item;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003011 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003012
3013 regs = PyTuple_New(self->groups);
3014 if (!regs)
3015 return NULL;
3016
3017 for (index = 0; index < self->groups; index++) {
3018 item = _pair(self->mark[index*2], self->mark[index*2+1]);
3019 if (!item) {
3020 Py_DECREF(regs);
3021 return NULL;
3022 }
3023 PyTuple_SET_ITEM(regs, index, item);
3024 }
3025
3026 Py_INCREF(regs);
3027 self->regs = regs;
3028
3029 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00003030}
3031
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003032static PyObject*
Georg Brandl964f5972006-05-28 22:38:57 +00003033match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003034{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003035#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003036 MatchObject* copy;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003037 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00003038
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003039 slots = 2 * (self->pattern->groups+1);
3040
3041 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3042 if (!copy)
3043 return NULL;
3044
3045 /* this value a constant, but any compiler should be able to
3046 figure that out all by itself */
3047 offset = offsetof(MatchObject, string);
3048
3049 Py_XINCREF(self->pattern);
3050 Py_XINCREF(self->string);
3051 Py_XINCREF(self->regs);
3052
3053 memcpy((char*) copy + offset, (char*) self + offset,
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003054 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003055
3056 return (PyObject*) copy;
3057#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003058 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003059 return NULL;
3060#endif
3061}
3062
3063static PyObject*
Georg Brandlfbef5882006-05-28 22:14:04 +00003064match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003065{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003066#ifdef USE_BUILTIN_COPY
3067 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00003068
Georg Brandlfbef5882006-05-28 22:14:04 +00003069 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003070 if (!copy)
3071 return NULL;
3072
3073 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3074 !deepcopy(&copy->string, memo) ||
3075 !deepcopy(&copy->regs, memo)) {
3076 Py_DECREF(copy);
3077 return NULL;
3078 }
3079
3080#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003081 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3082 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003083#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003084}
3085
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003086static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00003087 {"group", (PyCFunction) match_group, METH_VARARGS},
3088 {"start", (PyCFunction) match_start, METH_VARARGS},
3089 {"end", (PyCFunction) match_end, METH_VARARGS},
3090 {"span", (PyCFunction) match_span, METH_VARARGS},
3091 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
3092 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
Georg Brandlfbef5882006-05-28 22:14:04 +00003093 {"expand", (PyCFunction) match_expand, METH_O},
3094 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3095 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003096 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003097};
3098
Tim Peters3d563502006-01-21 02:47:53 +00003099static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003100match_getattr(MatchObject* self, char* name)
Guido van Rossumb700df92000-03-31 14:59:30 +00003101{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003102 PyObject* res;
Guido van Rossumb700df92000-03-31 14:59:30 +00003103
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003104 res = Py_FindMethod(match_methods, (PyObject*) self, name);
3105 if (res)
3106 return res;
Guido van Rossumb700df92000-03-31 14:59:30 +00003107
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003108 PyErr_Clear();
Guido van Rossumb700df92000-03-31 14:59:30 +00003109
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003110 if (!strcmp(name, "lastindex")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00003111 if (self->lastindex >= 0)
3112 return Py_BuildValue("i", self->lastindex);
Fredrik Lundhc2301732000-07-02 22:25:39 +00003113 Py_INCREF(Py_None);
3114 return Py_None;
3115 }
3116
3117 if (!strcmp(name, "lastgroup")) {
Fredrik Lundh6f013982000-07-03 18:44:21 +00003118 if (self->pattern->indexgroup && self->lastindex >= 0) {
Fredrik Lundhc2301732000-07-02 22:25:39 +00003119 PyObject* result = PySequence_GetItem(
Fredrik Lundh6f013982000-07-03 18:44:21 +00003120 self->pattern->indexgroup, self->lastindex
Fredrik Lundhc2301732000-07-02 22:25:39 +00003121 );
3122 if (result)
3123 return result;
3124 PyErr_Clear();
3125 }
3126 Py_INCREF(Py_None);
3127 return Py_None;
3128 }
3129
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003130 if (!strcmp(name, "string")) {
3131 if (self->string) {
3132 Py_INCREF(self->string);
3133 return self->string;
3134 } else {
3135 Py_INCREF(Py_None);
3136 return Py_None;
3137 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003138 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003139
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003140 if (!strcmp(name, "regs")) {
3141 if (self->regs) {
3142 Py_INCREF(self->regs);
3143 return self->regs;
3144 } else
3145 return match_regs(self);
3146 }
3147
3148 if (!strcmp(name, "re")) {
Guido van Rossumb700df92000-03-31 14:59:30 +00003149 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003150 return (PyObject*) self->pattern;
Guido van Rossumb700df92000-03-31 14:59:30 +00003151 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003152
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003153 if (!strcmp(name, "pos"))
3154 return Py_BuildValue("i", self->pos);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003155
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003156 if (!strcmp(name, "endpos"))
3157 return Py_BuildValue("i", self->endpos);
Guido van Rossumb700df92000-03-31 14:59:30 +00003158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003159 PyErr_SetString(PyExc_AttributeError, name);
3160 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003161}
3162
3163/* FIXME: implement setattr("string", None) as a special case (to
3164 detach the associated string, if any */
3165
3166statichere PyTypeObject Match_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003167 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00003168 0, "_" SRE_MODULE ".SRE_Match",
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003169 sizeof(MatchObject), sizeof(Py_ssize_t),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003170 (destructor)match_dealloc, /*tp_dealloc*/
3171 0, /*tp_print*/
3172 (getattrfunc)match_getattr /*tp_getattr*/
Guido van Rossumb700df92000-03-31 14:59:30 +00003173};
3174
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003175static PyObject*
3176pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3177{
3178 /* create match object (from state object) */
3179
3180 MatchObject* match;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003181 Py_ssize_t i, j;
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003182 char* base;
3183 int n;
3184
3185 if (status > 0) {
3186
3187 /* create match object (with room for extra group marks) */
Christian Heimes4956d2b2008-01-18 19:12:56 +00003188 /* coverity[ampersand_in_size] */
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003189 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3190 2*(pattern->groups+1));
3191 if (!match)
3192 return NULL;
3193
3194 Py_INCREF(pattern);
3195 match->pattern = pattern;
3196
3197 Py_INCREF(state->string);
3198 match->string = state->string;
3199
3200 match->regs = NULL;
3201 match->groups = pattern->groups+1;
3202
3203 /* fill in group slices */
3204
3205 base = (char*) state->beginning;
3206 n = state->charsize;
3207
3208 match->mark[0] = ((char*) state->start - base) / n;
3209 match->mark[1] = ((char*) state->ptr - base) / n;
3210
3211 for (i = j = 0; i < pattern->groups; i++, j+=2)
3212 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3213 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3214 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3215 } else
3216 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3217
3218 match->pos = state->pos;
3219 match->endpos = state->endpos;
3220
3221 match->lastindex = state->lastindex;
3222
3223 return (PyObject*) match;
3224
3225 } else if (status == 0) {
3226
3227 /* no match */
3228 Py_INCREF(Py_None);
3229 return Py_None;
3230
3231 }
3232
3233 /* internal error */
3234 pattern_error(status);
3235 return NULL;
3236}
3237
3238
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003239/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003240/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003241
3242static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003243scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003244{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003245 state_fini(&self->state);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003246 Py_DECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003247 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003248}
3249
3250static PyObject*
Georg Brandl964f5972006-05-28 22:38:57 +00003251scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003252{
3253 SRE_STATE* state = &self->state;
3254 PyObject* match;
3255 int status;
3256
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003257 state_reset(state);
3258
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003259 state->ptr = state->start;
3260
3261 if (state->charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003262 status = sre_match(state, PatternObject_GetCode(self->pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003263 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003264#if defined(HAVE_UNICODE)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003265 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003266#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003267 }
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00003268 if (PyErr_Occurred())
3269 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003270
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003271 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003272 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003273
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003274 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003275 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003276 else
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003277 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003278
3279 return match;
3280}
3281
3282
3283static PyObject*
Georg Brandl964f5972006-05-28 22:38:57 +00003284scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003285{
3286 SRE_STATE* state = &self->state;
3287 PyObject* match;
3288 int status;
3289
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003290 state_reset(state);
3291
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003292 state->ptr = state->start;
3293
3294 if (state->charsize == 1) {
3295 status = sre_search(state, PatternObject_GetCode(self->pattern));
3296 } else {
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003297#if defined(HAVE_UNICODE)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003298 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003299#endif
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003300 }
Andrew M. Kuchling36126c42006-10-04 13:42:43 +00003301 if (PyErr_Occurred())
3302 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003303
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003304 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003305 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003306
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003307 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003308 state->start = (void*) ((char*) state->ptr + state->charsize);
3309 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003310 state->start = state->ptr;
3311
3312 return match;
3313}
3314
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003315static PyMethodDef scanner_methods[] = {
Georg Brandlfbef5882006-05-28 22:14:04 +00003316 {"match", (PyCFunction) scanner_match, METH_NOARGS},
3317 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003318 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003319};
3320
Tim Peters3d563502006-01-21 02:47:53 +00003321static PyObject*
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003322scanner_getattr(ScannerObject* self, char* name)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003323{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003324 PyObject* res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003325
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003326 res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
3327 if (res)
3328 return res;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003329
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003330 PyErr_Clear();
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003331
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003332 /* attributes */
3333 if (!strcmp(name, "pattern")) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003334 Py_INCREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003335 return self->pattern;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003336 }
3337
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003338 PyErr_SetString(PyExc_AttributeError, name);
3339 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003340}
3341
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003342statichere PyTypeObject Scanner_Type = {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003343 PyObject_HEAD_INIT(NULL)
Fredrik Lundh82b23072001-12-09 16:13:15 +00003344 0, "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003345 sizeof(ScannerObject), 0,
3346 (destructor)scanner_dealloc, /*tp_dealloc*/
3347 0, /*tp_print*/
3348 (getattrfunc)scanner_getattr, /*tp_getattr*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003349};
3350
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003351static PyObject*
3352pattern_scanner(PatternObject* pattern, PyObject* args)
3353{
3354 /* create search state object */
3355
3356 ScannerObject* self;
3357
3358 PyObject* string;
Neal Norwitza6d80fa2006-06-12 03:05:40 +00003359 Py_ssize_t start = 0;
3360 Py_ssize_t end = PY_SSIZE_T_MAX;
3361 if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
Anthony Baxteraefd8ca2006-04-12 04:26:11 +00003362 return NULL;
3363
3364 /* create scanner object */
3365 self = PyObject_NEW(ScannerObject, &Scanner_Type);
3366 if (!self)
3367 return NULL;
3368
3369 string = state_init(&self->state, pattern, string, start, end);
3370 if (!string) {
3371 PyObject_DEL(self);
3372 return NULL;
3373 }
3374
3375 Py_INCREF(pattern);
3376 self->pattern = (PyObject*) pattern;
3377
3378 return (PyObject*) self;
3379}
3380
Guido van Rossumb700df92000-03-31 14:59:30 +00003381static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00003382 {"compile", _compile, METH_VARARGS},
Georg Brandlfbef5882006-05-28 22:14:04 +00003383 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00003384 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003385 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003386};
3387
Tim Peters3d563502006-01-21 02:47:53 +00003388#if PY_VERSION_HEX < 0x02030000
Andrew M. Kuchlingc24fe362003-04-30 13:09:08 +00003389DL_EXPORT(void) init_sre(void)
3390#else
Mark Hammond8235ea12002-07-19 06:55:41 +00003391PyMODINIT_FUNC init_sre(void)
Andrew M. Kuchlingc24fe362003-04-30 13:09:08 +00003392#endif
Guido van Rossumb700df92000-03-31 14:59:30 +00003393{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003394 PyObject* m;
3395 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00003396 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003397
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003398 /* Patch object types */
3399 Pattern_Type.ob_type = Match_Type.ob_type =
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003400 Scanner_Type.ob_type = &PyType_Type;
Guido van Rossumb700df92000-03-31 14:59:30 +00003401
Fredrik Lundh1c5aa692001-01-16 07:37:30 +00003402 m = Py_InitModule("_" SRE_MODULE, _functions);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00003403 if (m == NULL)
3404 return;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003405 d = PyModule_GetDict(m);
3406
Fredrik Lundh21009b92001-09-18 18:47:09 +00003407 x = PyInt_FromLong(SRE_MAGIC);
3408 if (x) {
3409 PyDict_SetItemString(d, "MAGIC", x);
3410 Py_DECREF(x);
3411 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003412
Martin v. Löwis78e2f062003-04-19 12:56:08 +00003413 x = PyInt_FromLong(sizeof(SRE_CODE));
3414 if (x) {
3415 PyDict_SetItemString(d, "CODESIZE", x);
3416 Py_DECREF(x);
3417 }
3418
Fredrik Lundh21009b92001-09-18 18:47:09 +00003419 x = PyString_FromString(copyright);
3420 if (x) {
3421 PyDict_SetItemString(d, "copyright", x);
3422 Py_DECREF(x);
3423 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003424}
3425
Fredrik Lundh436c3d52000-06-29 08:58:44 +00003426#endif /* !defined(SRE_RECURSIVE) */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00003427
3428/* vim:ts=4:sw=4:et
3429*/