blob: cb1f791242bdcb1998cdebc7e1e01aefee446b55 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00007 * 1999-10-24 fl created (based on existing template matcher code)
Fredrik Lundhebc37b22000-10-28 19:30:41 +00008 * 2000-03-06 fl first alpha, sort of
Fredrik Lundhebc37b22000-10-28 19:30:41 +00009 * 2000-08-01 fl fixes for 1.6b1
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000010 * 2000-08-07 fl use PyOS_CheckStack() if available
Fredrik Lundh5644b7f2000-09-21 17:03:25 +000011 * 2000-09-20 fl added expand method
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000012 * 2001-03-20 fl lots of fixes for 2.1b2
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000013 * 2001-04-15 fl export copyright as Python attribute, not global
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000014 * 2001-04-28 fl added __copy__ methods (work in progress)
Fredrik Lundh09705f02002-11-22 12:46:35 +000015 * 2001-05-14 fl fixes for 1.5.2 compatibility
Fredrik Lundhf71ae462001-07-02 17:04:48 +000016 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
Fredrik Lundh397a6542001-10-18 19:30:16 +000017 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
Fredrik Lundh971e78b2001-10-20 17:48:46 +000018 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
Fredrik Lundhbec95b92001-10-21 16:47:57 +000019 * 2001-10-21 fl added sub/subn primitive
Fredrik Lundh703ce812001-10-24 22:16:30 +000020 * 2001-10-24 fl added finditer primitive (for 2.2 only)
Fredrik Lundh82b23072001-12-09 16:13:15 +000021 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
Fredrik Lundh09705f02002-11-22 12:46:35 +000022 * 2002-11-09 fl fixed empty sub/subn return type
Martin v. Löwis78e2f062003-04-19 12:56:08 +000023 * 2003-04-18 mvl fully support 4-byte codes
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +000024 * 2003-10-17 gn implemented non recursive scheme
Guido van Rossumb700df92000-03-31 14:59:30 +000025 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000026 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000027 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000028 * This version of the SRE library can be redistributed under CNRI's
29 * Python 1.6 license. For any other use, please contact Secret Labs
30 * AB (info@pythonware.com).
31 *
Guido van Rossumb700df92000-03-31 14:59:30 +000032 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000033 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000034 * other compatibility work.
35 */
36
37#ifndef SRE_RECURSIVE
38
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000039static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000040 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000041
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042#define PY_SSIZE_T_CLEAN
43
Guido van Rossumb700df92000-03-31 14:59:30 +000044#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000045#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000046
47#include "sre.h"
48
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000050
Fredrik Lundh436c3d582000-06-29 08:58:44 +000051/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000052#if !defined(SRE_MODULE)
53#define SRE_MODULE "sre"
54#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000055
Thomas Wouters9ada3d62006-04-21 09:47:09 +000056#define SRE_PY_MODULE "re"
57
Guido van Rossumb700df92000-03-31 14:59:30 +000058/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000059#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000060
Fredrik Lundh22d25462000-07-01 17:50:59 +000061/* defining this enables unicode support (default under 1.6a1 and later) */
Fredrik Lundh436c3d582000-06-29 08:58:44 +000062#define HAVE_UNICODE
Fredrik Lundh436c3d582000-06-29 08:58:44 +000063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000065/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000066
67/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000068#define USE_FAST_SEARCH
69
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000070/* enables copy/deepcopy handling (work in progress) */
71#undef USE_BUILTIN_COPY
72
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000073#if PY_VERSION_HEX < 0x01060000
74#define PyObject_DEL(op) PyMem_DEL((op))
75#endif
76
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077/* -------------------------------------------------------------------- */
78
Fredrik Lundh80946112000-06-29 18:03:25 +000079#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000080#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000081#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000082/* fastest possible local call under MSVC */
83#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000084#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000085#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000086#else
87#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000088#endif
89
90/* error codes */
91#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000092#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000093#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000094#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000095#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000097#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000098#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000099#else
100#define TRACE(v)
101#endif
102
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000103/* -------------------------------------------------------------------- */
104/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +0000105
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000106/* default character predicates (run sre_chars.py to regenerate tables) */
107
108#define SRE_DIGIT_MASK 1
109#define SRE_SPACE_MASK 2
110#define SRE_LINEBREAK_MASK 4
111#define SRE_ALNUM_MASK 8
112#define SRE_WORD_MASK 16
113
Fredrik Lundh21009b92001-09-18 18:47:09 +0000114/* FIXME: this assumes ASCII. create tables in init_sre() instead */
115
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000116static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11925, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12024, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1210, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
12224, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
123
Fredrik Lundhb389df32000-06-29 12:48:37 +0000124static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000012510, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12627, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12744, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12861, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
129108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
130122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
131106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
132120, 121, 122, 123, 124, 125, 126, 127 };
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134#define SRE_IS_DIGIT(ch)\
135 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
136#define SRE_IS_SPACE(ch)\
137 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
138#define SRE_IS_LINEBREAK(ch)\
139 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
140#define SRE_IS_ALNUM(ch)\
141 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
142#define SRE_IS_WORD(ch)\
143 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000144
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145static unsigned int sre_lower(unsigned int ch)
146{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000147 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000148}
149
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000150/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000151/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
152 * warnings when c's type supports only numbers < N+1 */
153#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
154#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000155#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000156#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000157#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
158
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000159static unsigned int sre_lower_locale(unsigned int ch)
160{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000161 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000162}
163
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000164/* unicode-specific character predicates */
165
Victor Stinner0058b862011-09-29 03:27:47 +0200166#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
167#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
168#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
169#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
170#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000171
172static unsigned int sre_lower_unicode(unsigned int ch)
173{
Victor Stinner0058b862011-09-29 03:27:47 +0200174 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000175}
176
Guido van Rossumb700df92000-03-31 14:59:30 +0000177LOCAL(int)
178sre_category(SRE_CODE category, unsigned int ch)
179{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000180 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000181
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000182 case SRE_CATEGORY_DIGIT:
183 return SRE_IS_DIGIT(ch);
184 case SRE_CATEGORY_NOT_DIGIT:
185 return !SRE_IS_DIGIT(ch);
186 case SRE_CATEGORY_SPACE:
187 return SRE_IS_SPACE(ch);
188 case SRE_CATEGORY_NOT_SPACE:
189 return !SRE_IS_SPACE(ch);
190 case SRE_CATEGORY_WORD:
191 return SRE_IS_WORD(ch);
192 case SRE_CATEGORY_NOT_WORD:
193 return !SRE_IS_WORD(ch);
194 case SRE_CATEGORY_LINEBREAK:
195 return SRE_IS_LINEBREAK(ch);
196 case SRE_CATEGORY_NOT_LINEBREAK:
197 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000198
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000199 case SRE_CATEGORY_LOC_WORD:
200 return SRE_LOC_IS_WORD(ch);
201 case SRE_CATEGORY_LOC_NOT_WORD:
202 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000203
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000204 case SRE_CATEGORY_UNI_DIGIT:
205 return SRE_UNI_IS_DIGIT(ch);
206 case SRE_CATEGORY_UNI_NOT_DIGIT:
207 return !SRE_UNI_IS_DIGIT(ch);
208 case SRE_CATEGORY_UNI_SPACE:
209 return SRE_UNI_IS_SPACE(ch);
210 case SRE_CATEGORY_UNI_NOT_SPACE:
211 return !SRE_UNI_IS_SPACE(ch);
212 case SRE_CATEGORY_UNI_WORD:
213 return SRE_UNI_IS_WORD(ch);
214 case SRE_CATEGORY_UNI_NOT_WORD:
215 return !SRE_UNI_IS_WORD(ch);
216 case SRE_CATEGORY_UNI_LINEBREAK:
217 return SRE_UNI_IS_LINEBREAK(ch);
218 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
219 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000220 }
221 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000222}
223
224/* helpers */
225
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000226static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000227data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000228{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000229 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000230 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000231 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000232 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000233 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000234}
235
236static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000237data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000238{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000239 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000240 minsize = state->data_stack_base+size;
241 cursize = state->data_stack_size;
242 if (cursize < minsize) {
243 void* stack;
244 cursize = minsize+minsize/4+1024;
245 TRACE(("allocate/grow stack %d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000246 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000247 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000248 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000249 return SRE_ERROR_MEMORY;
250 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000251 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000252 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000253 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000254 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000255}
256
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000257/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000258
259#define SRE_CHAR unsigned char
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200260#define SRE_CHARGET(state, buf, index) ((unsigned char*)buf)[index]
Guido van Rossumb700df92000-03-31 14:59:30 +0000261#define SRE_AT sre_at
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000262#define SRE_COUNT sre_count
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000263#define SRE_CHARSET sre_charset
264#define SRE_INFO sre_info
Guido van Rossumb700df92000-03-31 14:59:30 +0000265#define SRE_MATCH sre_match
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000266#define SRE_MATCH_CONTEXT sre_match_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000267#define SRE_SEARCH sre_search
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000268
Guido van Rossumb700df92000-03-31 14:59:30 +0000269#define SRE_RECURSIVE
Guido van Rossumb700df92000-03-31 14:59:30 +0000270#include "_sre.c"
Guido van Rossumb700df92000-03-31 14:59:30 +0000271#undef SRE_RECURSIVE
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000272
Guido van Rossumb700df92000-03-31 14:59:30 +0000273#undef SRE_SEARCH
274#undef SRE_MATCH
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000275#undef SRE_MATCH_CONTEXT
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000276#undef SRE_INFO
277#undef SRE_CHARSET
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000278#undef SRE_COUNT
Guido van Rossumb700df92000-03-31 14:59:30 +0000279#undef SRE_AT
280#undef SRE_CHAR
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200281#undef SRE_CHARGET
Guido van Rossumb700df92000-03-31 14:59:30 +0000282
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200283/* generate 8/16/32-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000284
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200285#define SRE_CHAR void
286#define SRE_CHARGET(state, buf, index) \
287 ((state->charsize==1) ? ((Py_UCS1*)buf)[index] : \
288 (state->charsize==2) ? ((Py_UCS2*)buf)[index] : \
289 ((Py_UCS4*)buf)[index])
Guido van Rossumb700df92000-03-31 14:59:30 +0000290#define SRE_AT sre_uat
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000291#define SRE_COUNT sre_ucount
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000292#define SRE_CHARSET sre_ucharset
293#define SRE_INFO sre_uinfo
Guido van Rossumb700df92000-03-31 14:59:30 +0000294#define SRE_MATCH sre_umatch
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000295#define SRE_MATCH_CONTEXT sre_umatch_context
Guido van Rossumb700df92000-03-31 14:59:30 +0000296#define SRE_SEARCH sre_usearch
297
298#endif /* SRE_RECURSIVE */
299
300/* -------------------------------------------------------------------- */
301/* String matching engine */
302
303/* the following section is compiled twice, with different character
304 settings */
305
306LOCAL(int)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200307SRE_AT(SRE_STATE* state, char* ptr, SRE_CODE at)
Guido van Rossumb700df92000-03-31 14:59:30 +0000308{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000309 /* check if pointer is at given position */
Guido van Rossumb700df92000-03-31 14:59:30 +0000310
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000311 Py_ssize_t thisp, thatp;
Guido van Rossumb700df92000-03-31 14:59:30 +0000312
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000313 switch (at) {
Fredrik Lundh80946112000-06-29 18:03:25 +0000314
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000315 case SRE_AT_BEGINNING:
Fredrik Lundh770617b2001-01-14 15:06:11 +0000316 case SRE_AT_BEGINNING_STRING:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000317 return ((void*) ptr == state->beginning);
Fredrik Lundh80946112000-06-29 18:03:25 +0000318
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000319 case SRE_AT_BEGINNING_LINE:
320 return ((void*) ptr == state->beginning ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200321 SRE_IS_LINEBREAK((int) SRE_CHARGET(state, ptr, -1)));
Fredrik Lundh80946112000-06-29 18:03:25 +0000322
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000323 case SRE_AT_END:
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 return (((void*) (ptr+state->charsize) == state->end &&
325 SRE_IS_LINEBREAK((int) SRE_CHARGET(state, ptr, 0))) ||
Fredrik Lundhef34bd22000-06-30 21:40:20 +0000326 ((void*) ptr == state->end));
Fredrik Lundh80946112000-06-29 18:03:25 +0000327
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000328 case SRE_AT_END_LINE:
329 return ((void*) ptr == state->end ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 SRE_IS_LINEBREAK((int) SRE_CHARGET(state, ptr, 0)));
Fredrik Lundh80946112000-06-29 18:03:25 +0000331
Fredrik Lundh770617b2001-01-14 15:06:11 +0000332 case SRE_AT_END_STRING:
333 return ((void*) ptr == state->end);
334
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000335 case SRE_AT_BOUNDARY:
336 if (state->beginning == state->end)
337 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000338 thatp = ((void*) ptr > state->beginning) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200339 SRE_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000340 thisp = ((void*) ptr < state->end) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 SRE_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000342 return thisp != thatp;
Fredrik Lundh80946112000-06-29 18:03:25 +0000343
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000344 case SRE_AT_NON_BOUNDARY:
345 if (state->beginning == state->end)
346 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000347 thatp = ((void*) ptr > state->beginning) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 SRE_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000349 thisp = ((void*) ptr < state->end) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200350 SRE_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000351 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000352
353 case SRE_AT_LOC_BOUNDARY:
354 if (state->beginning == state->end)
355 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000356 thatp = ((void*) ptr > state->beginning) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 SRE_LOC_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000358 thisp = ((void*) ptr < state->end) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359 SRE_LOC_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000360 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000361
362 case SRE_AT_LOC_NON_BOUNDARY:
363 if (state->beginning == state->end)
364 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000365 thatp = ((void*) ptr > state->beginning) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 SRE_LOC_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000367 thisp = ((void*) ptr < state->end) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 SRE_LOC_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000369 return thisp == thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000370
371 case SRE_AT_UNI_BOUNDARY:
372 if (state->beginning == state->end)
373 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000374 thatp = ((void*) ptr > state->beginning) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 SRE_UNI_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000376 thisp = ((void*) ptr < state->end) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 SRE_UNI_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000378 return thisp != thatp;
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000379
380 case SRE_AT_UNI_NON_BOUNDARY:
381 if (state->beginning == state->end)
382 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000383 thatp = ((void*) ptr > state->beginning) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200384 SRE_UNI_IS_WORD((int) SRE_CHARGET(state, ptr, -1)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000385 thisp = ((void*) ptr < state->end) ?
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200386 SRE_UNI_IS_WORD((int) SRE_CHARGET(state, ptr, 0)) : 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000387 return thisp == thatp;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +0000388
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000389 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000390
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000391 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000392}
393
394LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000395SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
Guido van Rossumb700df92000-03-31 14:59:30 +0000396{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000397 /* check if character is a member of the given set */
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 int ok = 1;
Guido van Rossumb700df92000-03-31 14:59:30 +0000400
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000401 for (;;) {
402 switch (*set++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000403
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000404 case SRE_OP_FAILURE:
405 return !ok;
406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 case SRE_OP_LITERAL:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000408 /* <LITERAL> <code> */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 if (ch == set[0])
410 return ok;
411 set++;
412 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000413
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000414 case SRE_OP_CATEGORY:
415 /* <CATEGORY> <code> */
416 if (sre_category(set[0], (int) ch))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000417 return ok;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000418 set += 1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000419 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000420
Fredrik Lundh3562f112000-07-02 12:00:07 +0000421 case SRE_OP_CHARSET:
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000422 if (sizeof(SRE_CODE) == 2) {
423 /* <CHARSET> <bitmap> (16 bits per code word) */
424 if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
425 return ok;
426 set += 16;
Tim Peters3d563502006-01-21 02:47:53 +0000427 }
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000428 else {
429 /* <CHARSET> <bitmap> (32 bits per code word) */
430 if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
431 return ok;
432 set += 8;
433 }
Fredrik Lundh3562f112000-07-02 12:00:07 +0000434 break;
435
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000436 case SRE_OP_RANGE:
437 /* <RANGE> <lower> <upper> */
438 if (set[0] <= ch && ch <= set[1])
439 return ok;
440 set += 2;
441 break;
442
443 case SRE_OP_NEGATE:
444 ok = !ok;
445 break;
446
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000447 case SRE_OP_BIGCHARSET:
448 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
449 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450 Py_ssize_t count, block;
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000451 count = *(set++);
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000452
453 if (sizeof(SRE_CODE) == 2) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 block = ((char*)set)[ch >> 8];
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000455 set += 128;
456 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
457 return ok;
458 set += count*16;
459 }
460 else {
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000461 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
462 * warnings when c's type supports only numbers < N+1 */
463 if (!(ch & ~65535))
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 block = ((char*)set)[ch >> 8];
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000465 else
466 block = -1;
467 set += 64;
Tim Peters3d563502006-01-21 02:47:53 +0000468 if (block >=0 &&
Martin v. Löwis78e2f062003-04-19 12:56:08 +0000469 (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
470 return ok;
471 set += count*8;
472 }
Fredrik Lundhf71ae462001-07-02 17:04:48 +0000473 break;
474 }
Fredrik Lundh19af43d2001-07-02 16:58:38 +0000475
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000476 default:
477 /* internal error -- there's not much we can do about it
Fredrik Lundh80946112000-06-29 18:03:25 +0000478 here, so let's just pretend it didn't match... */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000479 return 0;
480 }
481 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000482}
483
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000484LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000485
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000486LOCAL(Py_ssize_t)
487SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000488{
489 SRE_CODE chr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200490 char* ptr = (char *)state->ptr;
491 char* end = (char *)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000492 Py_ssize_t i;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000493
494 /* adjust end */
495 if (maxcount < end - ptr && maxcount != 65535)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200496 end = ptr + maxcount*state->charsize;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497
498 switch (pattern[0]) {
499
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000500 case SRE_OP_IN:
501 /* repeated set */
502 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
Victor Stinner63ab8752011-11-22 03:31:20 +0100503 while (ptr < end &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200504 SRE_CHARSET(pattern + 2, SRE_CHARGET(state, ptr, 0)))
505 ptr += state->charsize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000506 break;
507
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000508 case SRE_OP_ANY:
509 /* repeated dot wildcard. */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000510 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200511 while (ptr < end && !SRE_IS_LINEBREAK(SRE_CHARGET(state, ptr, 0)))
512 ptr += state->charsize;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000513 break;
514
515 case SRE_OP_ANY_ALL:
Gustavo Niemeyer0506c642004-09-03 18:11:59 +0000516 /* repeated dot wildcard. skip to the end of the target
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000517 string, and backtrack from there */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000518 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000519 ptr = end;
520 break;
521
522 case SRE_OP_LITERAL:
523 /* repeated literal */
524 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000525 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200526 while (ptr < end && (SRE_CODE) SRE_CHARGET(state, ptr, 0) == chr)
527 ptr += state->charsize;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000528 break;
529
530 case SRE_OP_LITERAL_IGNORE:
531 /* repeated literal */
532 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000533 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200534 while (ptr < end && (SRE_CODE) state->lower(SRE_CHARGET(state, ptr, 0)) == chr)
535 ptr += state->charsize;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000536 break;
537
538 case SRE_OP_NOT_LITERAL:
539 /* repeated non-literal */
540 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000541 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200542 while (ptr < end && (SRE_CODE) SRE_CHARGET(state, ptr, 0) != chr)
543 ptr += state->charsize;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000544 break;
Tim Peters3d563502006-01-21 02:47:53 +0000545
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000546 case SRE_OP_NOT_LITERAL_IGNORE:
547 /* repeated non-literal */
548 chr = pattern[1];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000549 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200550 while (ptr < end && (SRE_CODE) state->lower(SRE_CHARGET(state, ptr, 0)) != chr)
551 ptr += state->charsize;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000552 break;
553
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000554 default:
555 /* repeated single character pattern */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000556 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200557 while ((char*) state->ptr < end) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000558 i = SRE_MATCH(state, pattern);
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000559 if (i < 0)
560 return i;
561 if (!i)
562 break;
563 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000564 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200565 ((char*)state->ptr - ptr)/state->charsize));
566 return ((char*)state->ptr - ptr)/state->charsize;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000567 }
568
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200569 TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, (ptr - (char*) state->ptr)/state->charsize));
570 return (ptr - (char*) state->ptr)/state->charsize;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000571}
572
Fredrik Lundh33accc12000-08-27 20:59:47 +0000573#if 0 /* not used in this release */
Guido van Rossumb700df92000-03-31 14:59:30 +0000574LOCAL(int)
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000575SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
576{
577 /* check if an SRE_OP_INFO block matches at the current position.
578 returns the number of SRE_CODE objects to skip if successful, 0
579 if no match */
580
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 char* end = state->end;
582 char* ptr = state->ptr;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000583 Py_ssize_t i;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000584
585 /* check minimal length */
586 if (pattern[3] && (end - ptr) < pattern[3])
587 return 0;
588
589 /* check known prefix */
590 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
591 /* <length> <skip> <prefix data> <overlap data> */
592 for (i = 0; i < pattern[5]; i++)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200593 if ((SRE_CODE) SRE_CHARGET(state, ptr, i) != pattern[7 + i])
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000594 return 0;
595 return pattern[0] + 2 * pattern[6];
596 }
597 return pattern[0];
598}
Fredrik Lundh33accc12000-08-27 20:59:47 +0000599#endif
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000600
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000601/* The macros below should be used to protect recursive SRE_MATCH()
602 * calls that *failed* and do *not* return immediately (IOW, those
603 * that will backtrack). Explaining:
604 *
605 * - Recursive SRE_MATCH() returned true: that's usually a success
606 * (besides atypical cases like ASSERT_NOT), therefore there's no
607 * reason to restore lastmark;
608 *
609 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
610 * is returning to the caller: If the current SRE_MATCH() is the
611 * top function of the recursion, returning false will be a matching
612 * failure, and it doesn't matter where lastmark is pointing to.
613 * If it's *not* the top function, it will be a recursive SRE_MATCH()
614 * failure by itself, and the calling SRE_MATCH() will have to deal
615 * with the failure by the same rules explained here (it will restore
616 * lastmark by itself if necessary);
617 *
618 * - Recursive SRE_MATCH() returned false, and will continue the
619 * outside 'for' loop: must be protected when breaking, since the next
620 * OP could potentially depend on lastmark;
Tim Peters3d563502006-01-21 02:47:53 +0000621 *
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +0000622 * - Recursive SRE_MATCH() returned false, and will be called again
623 * inside a local for/while loop: must be protected between each
624 * loop iteration, since the recursive SRE_MATCH() could do anything,
625 * and could potentially depend on lastmark.
626 *
627 * For more information, check the discussion at SF patch #712900.
628 */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000629#define LASTMARK_SAVE() \
630 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000631 ctx->lastmark = state->lastmark; \
632 ctx->lastindex = state->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000633 } while (0)
634#define LASTMARK_RESTORE() \
635 do { \
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000636 state->lastmark = ctx->lastmark; \
637 state->lastindex = ctx->lastindex; \
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000638 } while (0)
639
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000640#define RETURN_ERROR(i) do { return i; } while(0)
641#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
642#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
643
644#define RETURN_ON_ERROR(i) \
645 do { if (i < 0) RETURN_ERROR(i); } while (0)
646#define RETURN_ON_SUCCESS(i) \
647 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
648#define RETURN_ON_FAILURE(i) \
649 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
650
651#define SFY(x) #x
652
653#define DATA_STACK_ALLOC(state, type, ptr) \
654do { \
655 alloc_pos = state->data_stack_base; \
656 TRACE(("allocating %s in %d (%d)\n", \
657 SFY(type), alloc_pos, sizeof(type))); \
658 if (state->data_stack_size < alloc_pos+sizeof(type)) { \
659 int j = data_stack_grow(state, sizeof(type)); \
660 if (j < 0) return j; \
661 if (ctx_pos != -1) \
662 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
663 } \
664 ptr = (type*)(state->data_stack+alloc_pos); \
665 state->data_stack_base += sizeof(type); \
666} while (0)
667
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000668#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
669do { \
670 TRACE(("looking up %s at %d\n", SFY(type), pos)); \
671 ptr = (type*)(state->data_stack+pos); \
672} while (0)
673
674#define DATA_STACK_PUSH(state, data, size) \
675do { \
676 TRACE(("copy data in %p to %d (%d)\n", \
677 data, state->data_stack_base, size)); \
678 if (state->data_stack_size < state->data_stack_base+size) { \
679 int j = data_stack_grow(state, size); \
680 if (j < 0) return j; \
681 if (ctx_pos != -1) \
682 DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
683 } \
684 memcpy(state->data_stack+state->data_stack_base, data, size); \
685 state->data_stack_base += size; \
686} while (0)
687
688#define DATA_STACK_POP(state, data, size, discard) \
689do { \
690 TRACE(("copy data to %p from %d (%d)\n", \
691 data, state->data_stack_base-size, size)); \
692 memcpy(data, state->data_stack+state->data_stack_base-size, size); \
693 if (discard) \
694 state->data_stack_base -= size; \
695} while (0)
696
697#define DATA_STACK_POP_DISCARD(state, size) \
698do { \
699 TRACE(("discard data from %d (%d)\n", \
700 state->data_stack_base-size, size)); \
701 state->data_stack_base -= size; \
702} while(0)
703
704#define DATA_PUSH(x) \
705 DATA_STACK_PUSH(state, (x), sizeof(*(x)))
706#define DATA_POP(x) \
707 DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000708#define DATA_POP_DISCARD(x) \
709 DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
710#define DATA_ALLOC(t,p) \
711 DATA_STACK_ALLOC(state, t, p)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000712#define DATA_LOOKUP_AT(t,p,pos) \
713 DATA_STACK_LOOKUP_AT(state,t,p,pos)
714
715#define MARK_PUSH(lastmark) \
716 do if (lastmark > 0) { \
717 i = lastmark; /* ctx->lastmark may change if reallocated */ \
718 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
719 } while (0)
720#define MARK_POP(lastmark) \
721 do if (lastmark > 0) { \
722 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
723 } while (0)
724#define MARK_POP_KEEP(lastmark) \
725 do if (lastmark > 0) { \
726 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
727 } while (0)
728#define MARK_POP_DISCARD(lastmark) \
729 do if (lastmark > 0) { \
730 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
731 } while (0)
732
733#define JUMP_NONE 0
734#define JUMP_MAX_UNTIL_1 1
735#define JUMP_MAX_UNTIL_2 2
736#define JUMP_MAX_UNTIL_3 3
737#define JUMP_MIN_UNTIL_1 4
738#define JUMP_MIN_UNTIL_2 5
739#define JUMP_MIN_UNTIL_3 6
740#define JUMP_REPEAT 7
741#define JUMP_REPEAT_ONE_1 8
742#define JUMP_REPEAT_ONE_2 9
743#define JUMP_MIN_REPEAT_ONE 10
744#define JUMP_BRANCH 11
745#define JUMP_ASSERT 12
746#define JUMP_ASSERT_NOT 13
747
748#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
749 DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
750 nextctx->last_ctx_pos = ctx_pos; \
751 nextctx->jump = jumpvalue; \
752 nextctx->pattern = nextpattern; \
753 ctx_pos = alloc_pos; \
754 ctx = nextctx; \
755 goto entrance; \
756 jumplabel: \
757 while (0) /* gcc doesn't like labels at end of scopes */ \
758
759typedef struct {
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000760 Py_ssize_t last_ctx_pos;
761 Py_ssize_t jump;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200762 char* ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000763 SRE_CODE* pattern;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000764 Py_ssize_t count;
765 Py_ssize_t lastmark;
766 Py_ssize_t lastindex;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000767 union {
768 SRE_CODE chr;
769 SRE_REPEAT* rep;
770 } u;
771} SRE_MATCH_CONTEXT;
772
773/* check if string matches the given pattern. returns <0 for
774 error, 0 for failure, and 1 for success */
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000775LOCAL(Py_ssize_t)
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000776SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
Guido van Rossumb700df92000-03-31 14:59:30 +0000777{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200778 char* end = (char*)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000779 Py_ssize_t alloc_pos, ctx_pos = -1;
780 Py_ssize_t i, ret = 0;
781 Py_ssize_t jump;
Christian Heimes2380ac72008-01-09 00:17:24 +0000782 unsigned int sigcount=0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000783
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000784 SRE_MATCH_CONTEXT* ctx;
785 SRE_MATCH_CONTEXT* nextctx;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000786
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +0000787 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000788
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000789 DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
790 ctx->last_ctx_pos = -1;
791 ctx->jump = JUMP_NONE;
792 ctx->pattern = pattern;
793 ctx_pos = alloc_pos;
794
795entrance:
796
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200797 ctx->ptr = (char *)state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000798
799 if (ctx->pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000800 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000801 /* <INFO> <1=skip> <2=flags> <3=min> ... */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200802 if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000803 TRACE(("reject (got %d chars, need %d)\n",
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000804 (end - ctx->ptr), ctx->pattern[3]));
805 RETURN_FAILURE;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000806 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000807 ctx->pattern += ctx->pattern[1] + 1;
Fredrik Lundh29c08be2000-06-29 23:33:12 +0000808 }
809
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000810 for (;;) {
Christian Heimes2380ac72008-01-09 00:17:24 +0000811 ++sigcount;
812 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
813 RETURN_ERROR(SRE_ERROR_INTERRUPTED);
Guido van Rossumb700df92000-03-31 14:59:30 +0000814
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000815 switch (*ctx->pattern++) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000816
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000817 case SRE_OP_MARK:
818 /* set mark */
819 /* <MARK> <gid> */
820 TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
821 ctx->ptr, ctx->pattern[0]));
822 i = ctx->pattern[0];
823 if (i & 1)
824 state->lastindex = i/2 + 1;
825 if (i > state->lastmark) {
826 /* state->lastmark is the highest valid index in the
827 state->mark array. If it is increased by more than 1,
828 the intervening marks must be set to NULL to signal
Tim Peters3d563502006-01-21 02:47:53 +0000829 that these marks have not been encountered. */
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000830 Py_ssize_t j = state->lastmark + 1;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000831 while (j < i)
832 state->mark[j++] = NULL;
833 state->lastmark = i;
834 }
835 state->mark[i] = ctx->ptr;
836 ctx->pattern++;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000837 break;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000838
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000839 case SRE_OP_LITERAL:
840 /* match literal string */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000841 /* <LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000842 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
843 ctx->ptr, *ctx->pattern));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200844 if (ctx->ptr >= end || (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0) != ctx->pattern[0])
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000845 RETURN_FAILURE;
846 ctx->pattern++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200847 ctx->ptr += state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000848 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000849
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000850 case SRE_OP_NOT_LITERAL:
851 /* match anything that is not literal character */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000852 /* <NOT_LITERAL> <code> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000853 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
854 ctx->ptr, *ctx->pattern));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200855 if (ctx->ptr >= end || (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0) == ctx->pattern[0])
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000856 RETURN_FAILURE;
857 ctx->pattern++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200858 ctx->ptr += state->charsize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000859 break;
860
861 case SRE_OP_SUCCESS:
862 /* end of pattern */
863 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
864 state->ptr = ctx->ptr;
865 RETURN_SUCCESS;
866
867 case SRE_OP_AT:
868 /* match at given position */
869 /* <AT> <code> */
870 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
871 if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
872 RETURN_FAILURE;
873 ctx->pattern++;
874 break;
875
876 case SRE_OP_CATEGORY:
877 /* match at given category */
878 /* <CATEGORY> <code> */
879 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
880 ctx->ptr, *ctx->pattern));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200881 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], SRE_CHARGET(state, ctx->ptr, 0)))
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000882 RETURN_FAILURE;
883 ctx->pattern++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200884 ctx->ptr += state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000885 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000886
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000887 case SRE_OP_ANY:
Fredrik Lundhe1869832000-08-01 22:47:49 +0000888 /* match anything (except a newline) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000889 /* <ANY> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000890 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200891 if (ctx->ptr >= end || SRE_IS_LINEBREAK(SRE_CHARGET(state, ctx->ptr, 0)))
892 RETURN_FAILURE;
893 ctx->ptr += state->charsize;
Fredrik Lundhe1869832000-08-01 22:47:49 +0000894 break;
895
896 case SRE_OP_ANY_ALL:
897 /* match anything */
898 /* <ANY_ALL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000899 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
900 if (ctx->ptr >= end)
901 RETURN_FAILURE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200902 ctx->ptr += state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000903 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000904
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000905 case SRE_OP_IN:
906 /* match set member (or non_member) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000907 /* <IN> <skip> <set> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000908 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200909 if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, SRE_CHARGET(state, ctx->ptr, 0)))
910 RETURN_FAILURE;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000911 ctx->pattern += ctx->pattern[0];
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200912 ctx->ptr += state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000913 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000914
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000915 case SRE_OP_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000916 TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
917 ctx->pattern, ctx->ptr, ctx->pattern[0]));
918 if (ctx->ptr >= end ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200919 state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*ctx->pattern))
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000920 RETURN_FAILURE;
921 ctx->pattern++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200922 ctx->ptr += state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000923 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000924
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000925 case SRE_OP_NOT_LITERAL_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000926 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
927 ctx->pattern, ctx->ptr, *ctx->pattern));
928 if (ctx->ptr >= end ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200929 state->lower(SRE_CHARGET(state, ctx->ptr, 0)) == state->lower(*ctx->pattern))
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000930 RETURN_FAILURE;
931 ctx->pattern++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200932 ctx->ptr += state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000933 break;
Guido van Rossumb700df92000-03-31 14:59:30 +0000934
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000935 case SRE_OP_IN_IGNORE:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000936 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
937 if (ctx->ptr >= end
938 || !SRE_CHARSET(ctx->pattern+1,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200939 (SRE_CODE)state->lower(SRE_CHARGET(state, ctx->ptr, 0))))
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000940 RETURN_FAILURE;
941 ctx->pattern += ctx->pattern[0];
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200942 ctx->ptr += state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000943 break;
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000944
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000945 case SRE_OP_JUMP:
946 case SRE_OP_INFO:
947 /* jump forward */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000948 /* <JUMP> <offset> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000949 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
950 ctx->ptr, ctx->pattern[0]));
951 ctx->pattern += ctx->pattern[0];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000952 break;
953
954 case SRE_OP_BRANCH:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000955 /* alternation */
956 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000957 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000958 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000959 ctx->u.rep = state->repeat;
960 if (ctx->u.rep)
961 MARK_PUSH(ctx->lastmark);
962 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
963 if (ctx->pattern[1] == SRE_OP_LITERAL &&
964 (ctx->ptr >= end ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200965 (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0) != ctx->pattern[2]))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000966 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000967 if (ctx->pattern[1] == SRE_OP_IN &&
968 (ctx->ptr >= end ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200969 !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0))))
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000970 continue;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000971 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000972 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000973 if (ret) {
974 if (ctx->u.rep)
975 MARK_POP_DISCARD(ctx->lastmark);
976 RETURN_ON_ERROR(ret);
977 RETURN_SUCCESS;
Gustavo Niemeyerc34f2552003-04-27 12:34:14 +0000978 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000979 if (ctx->u.rep)
980 MARK_POP_KEEP(ctx->lastmark);
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +0000981 LASTMARK_RESTORE();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000982 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000983 if (ctx->u.rep)
984 MARK_POP_DISCARD(ctx->lastmark);
985 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +0000986
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000987 case SRE_OP_REPEAT_ONE:
988 /* match repeated sequence (maximizing regexp) */
989
990 /* this operator only works if the repeated item is
991 exactly one character wide, and we're not already
992 collecting backtracking points. for other cases,
Fredrik Lundh770617b2001-01-14 15:06:11 +0000993 use the MAX_REPEAT operator */
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000994
995 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
996
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000997 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
998 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +0000999
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001000 if (ctx->ptr + state->charsize * ctx->pattern[1] > end)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001001 RETURN_FAILURE; /* cannot match */
Fredrik Lundhe1869832000-08-01 22:47:49 +00001002
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001003 state->ptr = ctx->ptr;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001004
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001005 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1006 RETURN_ON_ERROR(ret);
1007 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1008 ctx->count = ret;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001009 ctx->ptr += state->charsize * ctx->count;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001010
1011 /* when we arrive here, count contains the number of
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001012 matches, and ctx->ptr points to the tail of the target
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001013 string. check if the rest of the pattern matches,
1014 and backtrack if not. */
1015
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001016 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001017 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001018
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001019 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001020 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001021 state->ptr = ctx->ptr;
1022 RETURN_SUCCESS;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001023 }
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001024
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001025 LASTMARK_SAVE();
1026
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001027 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001028 /* tail starts with a literal. skip positions where
1029 the rest of the pattern cannot possibly match */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001030 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001031 for (;;) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001032 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
Victor Stinner63ab8752011-11-22 03:31:20 +01001033 (ctx->ptr >= end ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001034 SRE_CHARGET(state, ctx->ptr, 0) != ctx->u.chr)) {
1035 ctx->ptr -= state->charsize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001036 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001037 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001038 if (ctx->count < (Py_ssize_t) ctx->pattern[1])
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001039 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001040 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001041 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1042 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001043 if (ret) {
1044 RETURN_ON_ERROR(ret);
1045 RETURN_SUCCESS;
1046 }
Tim Peters3d563502006-01-21 02:47:53 +00001047
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001048 LASTMARK_RESTORE();
Tim Peters3d563502006-01-21 02:47:53 +00001049
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001050 ctx->ptr -= state->charsize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001051 ctx->count--;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001052 }
1053
1054 } else {
1055 /* general case */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001056 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001057 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001058 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1059 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001060 if (ret) {
1061 RETURN_ON_ERROR(ret);
1062 RETURN_SUCCESS;
1063 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001064 ctx->ptr -= state->charsize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001065 ctx->count--;
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001066 LASTMARK_RESTORE();
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001067 }
1068 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001069 RETURN_FAILURE;
Fredrik Lundh2f2c67d2000-08-01 21:05:41 +00001070
Guido van Rossum41c99e72003-04-14 17:59:34 +00001071 case SRE_OP_MIN_REPEAT_ONE:
1072 /* match repeated sequence (minimizing regexp) */
1073
1074 /* this operator only works if the repeated item is
1075 exactly one character wide, and we're not already
1076 collecting backtracking points. for other cases,
1077 use the MIN_REPEAT operator */
1078
1079 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1080
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001081 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1082 ctx->pattern[1], ctx->pattern[2]));
Guido van Rossum41c99e72003-04-14 17:59:34 +00001083
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001084 if (ctx->ptr + state->charsize * ctx->pattern[1] > end)
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001085 RETURN_FAILURE; /* cannot match */
Guido van Rossum41c99e72003-04-14 17:59:34 +00001086
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001087 state->ptr = ctx->ptr;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001088
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001089 if (ctx->pattern[1] == 0)
1090 ctx->count = 0;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001091 else {
1092 /* count using pattern min as the maximum */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001093 ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1094 RETURN_ON_ERROR(ret);
1095 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001096 if (ret < (Py_ssize_t) ctx->pattern[1])
Tim Peters3d563502006-01-21 02:47:53 +00001097 /* didn't match minimum number of times */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001098 RETURN_FAILURE;
1099 /* advance past minimum matches of repeat */
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001100 ctx->count = ret;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001101 ctx->ptr += state->charsize * ctx->count;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001102 }
1103
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001104 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
Guido van Rossum41c99e72003-04-14 17:59:34 +00001105 /* tail is empty. we're finished */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001106 state->ptr = ctx->ptr;
1107 RETURN_SUCCESS;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001108
1109 } else {
1110 /* general case */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00001111 LASTMARK_SAVE();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001112 while ((Py_ssize_t)ctx->pattern[2] == 65535
1113 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001114 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001115 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1116 ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001117 if (ret) {
1118 RETURN_ON_ERROR(ret);
1119 RETURN_SUCCESS;
1120 }
1121 state->ptr = ctx->ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001122 ret = SRE_COUNT(state, ctx->pattern+3, 1);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001123 RETURN_ON_ERROR(ret);
Gustavo Niemeyer166878f2004-12-02 16:15:39 +00001124 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001125 if (ret == 0)
Guido van Rossum41c99e72003-04-14 17:59:34 +00001126 break;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001127 assert(ret == 1);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001128 ctx->ptr += state->charsize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001129 ctx->count++;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001130 LASTMARK_RESTORE();
Guido van Rossum41c99e72003-04-14 17:59:34 +00001131 }
Guido van Rossum41c99e72003-04-14 17:59:34 +00001132 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001133 RETURN_FAILURE;
Guido van Rossum41c99e72003-04-14 17:59:34 +00001134
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001135 case SRE_OP_REPEAT:
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001136 /* create repeat context. all the hard work is done
Fredrik Lundh770617b2001-01-14 15:06:11 +00001137 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001138 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001139 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1140 ctx->pattern[1], ctx->pattern[2]));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001141
1142 /* install new repeat context */
Thomas Wouters477c8d52006-05-27 19:21:47 +00001143 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
Thomas Wouters89f507f2006-12-13 04:49:30 +00001144 if (!ctx->u.rep) {
1145 PyErr_NoMemory();
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001146 RETURN_FAILURE;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001147 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001148 ctx->u.rep->count = -1;
1149 ctx->u.rep->pattern = ctx->pattern;
1150 ctx->u.rep->prev = state->repeat;
1151 ctx->u.rep->last_ptr = NULL;
1152 state->repeat = ctx->u.rep;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001153
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001154 state->ptr = ctx->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001155 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001156 state->repeat = ctx->u.rep->prev;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001157 PyObject_FREE(ctx->u.rep);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001158
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001159 if (ret) {
1160 RETURN_ON_ERROR(ret);
1161 RETURN_SUCCESS;
1162 }
1163 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001164
1165 case SRE_OP_MAX_UNTIL:
1166 /* maximizing repeat */
1167 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1168
1169 /* FIXME: we probably need to deal with zero-width
1170 matches in here... */
1171
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001172 ctx->u.rep = state->repeat;
1173 if (!ctx->u.rep)
1174 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001175
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001176 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001177
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001178 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001179
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001180 TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1181 ctx->ptr, ctx->count));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001182
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001183 if (ctx->count < ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001184 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001185 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001186 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1187 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001188 if (ret) {
1189 RETURN_ON_ERROR(ret);
1190 RETURN_SUCCESS;
1191 }
1192 ctx->u.rep->count = ctx->count-1;
1193 state->ptr = ctx->ptr;
1194 RETURN_FAILURE;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001195 }
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001196
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001197 if ((ctx->count < ctx->u.rep->pattern[2] ||
1198 ctx->u.rep->pattern[2] == 65535) &&
1199 state->ptr != ctx->u.rep->last_ptr) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001200 /* we may have enough matches, but if we can
1201 match another item, do so */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001202 ctx->u.rep->count = ctx->count;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001203 LASTMARK_SAVE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001204 MARK_PUSH(ctx->lastmark);
1205 /* zero-width match protection */
1206 DATA_PUSH(&ctx->u.rep->last_ptr);
1207 ctx->u.rep->last_ptr = state->ptr;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001208 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1209 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001210 DATA_POP(&ctx->u.rep->last_ptr);
1211 if (ret) {
1212 MARK_POP_DISCARD(ctx->lastmark);
1213 RETURN_ON_ERROR(ret);
1214 RETURN_SUCCESS;
1215 }
1216 MARK_POP(ctx->lastmark);
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001217 LASTMARK_RESTORE();
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001218 ctx->u.rep->count = ctx->count-1;
1219 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001220 }
1221
1222 /* cannot match more repeated items here. make sure the
1223 tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001224 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001225 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001226 RETURN_ON_SUCCESS(ret);
1227 state->repeat = ctx->u.rep;
1228 state->ptr = ctx->ptr;
1229 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001230
1231 case SRE_OP_MIN_UNTIL:
1232 /* minimizing repeat */
1233 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1234
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001235 ctx->u.rep = state->repeat;
1236 if (!ctx->u.rep)
1237 RETURN_ERROR(SRE_ERROR_STATE);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001238
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001239 state->ptr = ctx->ptr;
Gustavo Niemeyer3c9068b2003-04-22 15:39:09 +00001240
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001241 ctx->count = ctx->u.rep->count+1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001242
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001243 TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1244 ctx->ptr, ctx->count, ctx->u.rep->pattern));
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001245
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001246 if (ctx->count < ctx->u.rep->pattern[1]) {
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001247 /* not enough matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001248 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001249 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1250 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001251 if (ret) {
1252 RETURN_ON_ERROR(ret);
1253 RETURN_SUCCESS;
1254 }
1255 ctx->u.rep->count = ctx->count-1;
1256 state->ptr = ctx->ptr;
1257 RETURN_FAILURE;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001258 }
1259
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001260 LASTMARK_SAVE();
1261
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001262 /* see if the tail matches */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001263 state->repeat = ctx->u.rep->prev;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001264 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001265 if (ret) {
1266 RETURN_ON_ERROR(ret);
1267 RETURN_SUCCESS;
1268 }
Fredrik Lundhfa25a7d2001-01-14 23:55:55 +00001269
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001270 state->repeat = ctx->u.rep;
1271 state->ptr = ctx->ptr;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001272
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001273 LASTMARK_RESTORE();
1274
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001275 if (ctx->count >= ctx->u.rep->pattern[2]
1276 && ctx->u.rep->pattern[2] != 65535)
1277 RETURN_FAILURE;
Gustavo Niemeyercaf1c9d2003-04-27 14:42:54 +00001278
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001279 ctx->u.rep->count = ctx->count;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001280 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1281 ctx->u.rep->pattern+3);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001282 if (ret) {
1283 RETURN_ON_ERROR(ret);
1284 RETURN_SUCCESS;
1285 }
1286 ctx->u.rep->count = ctx->count-1;
1287 state->ptr = ctx->ptr;
1288 RETURN_FAILURE;
1289
1290 case SRE_OP_GROUPREF:
1291 /* match backreference */
1292 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1293 ctx->ptr, ctx->pattern[0]));
1294 i = ctx->pattern[0];
1295 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001296 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001297 if (groupref >= state->lastmark) {
1298 RETURN_FAILURE;
1299 } else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001300 char* p = (char*) state->mark[groupref];
1301 char* e = (char*) state->mark[groupref+1];
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001302 if (!p || !e || e < p)
1303 RETURN_FAILURE;
1304 while (p < e) {
Victor Stinner63ab8752011-11-22 03:31:20 +01001305 if (ctx->ptr >= end ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 SRE_CHARGET(state, ctx->ptr, 0) != SRE_CHARGET(state, p, 0))
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001307 RETURN_FAILURE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 p += state->charsize;
1309 ctx->ptr += state->charsize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001310 }
1311 }
1312 }
1313 ctx->pattern++;
1314 break;
1315
1316 case SRE_OP_GROUPREF_IGNORE:
1317 /* match backreference */
1318 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1319 ctx->ptr, ctx->pattern[0]));
1320 i = ctx->pattern[0];
1321 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001322 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001323 if (groupref >= state->lastmark) {
1324 RETURN_FAILURE;
1325 } else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001326 char* p = (char*) state->mark[groupref];
1327 char* e = (char*) state->mark[groupref+1];
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001328 if (!p || !e || e < p)
1329 RETURN_FAILURE;
1330 while (p < e) {
1331 if (ctx->ptr >= end ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001332 state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001333 RETURN_FAILURE;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001334 p++;
1335 ctx->ptr += state->charsize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001336 }
1337 }
1338 }
1339 ctx->pattern++;
1340 break;
1341
1342 case SRE_OP_GROUPREF_EXISTS:
1343 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1344 ctx->ptr, ctx->pattern[0]));
1345 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1346 i = ctx->pattern[0];
1347 {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001348 Py_ssize_t groupref = i+i;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001349 if (groupref >= state->lastmark) {
1350 ctx->pattern += ctx->pattern[1];
1351 break;
1352 } else {
1353 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1354 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1355 if (!p || !e || e < p) {
1356 ctx->pattern += ctx->pattern[1];
1357 break;
1358 }
1359 }
1360 }
1361 ctx->pattern += 2;
1362 break;
1363
1364 case SRE_OP_ASSERT:
1365 /* assert subpattern */
1366 /* <ASSERT> <skip> <back> <pattern> */
1367 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1368 ctx->ptr, ctx->pattern[1]));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369 state->ptr = ctx->ptr - state->charsize * ctx->pattern[1];
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001370 if (state->ptr < state->beginning)
1371 RETURN_FAILURE;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001372 DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001373 RETURN_ON_FAILURE(ret);
1374 ctx->pattern += ctx->pattern[0];
1375 break;
1376
1377 case SRE_OP_ASSERT_NOT:
1378 /* assert not subpattern */
1379 /* <ASSERT_NOT> <skip> <back> <pattern> */
1380 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1381 ctx->ptr, ctx->pattern[1]));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001382 state->ptr = ctx->ptr - state->charsize * ctx->pattern[1];
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001383 if (state->ptr >= state->beginning) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001384 DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001385 if (ret) {
1386 RETURN_ON_ERROR(ret);
1387 RETURN_FAILURE;
1388 }
1389 }
1390 ctx->pattern += ctx->pattern[0];
1391 break;
1392
1393 case SRE_OP_FAILURE:
1394 /* immediate failure */
1395 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1396 RETURN_FAILURE;
Guido van Rossumb700df92000-03-31 14:59:30 +00001397
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001398 default:
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001399 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1400 ctx->pattern[-1]));
1401 RETURN_ERROR(SRE_ERROR_ILLEGAL);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001402 }
1403 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001404
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001405exit:
1406 ctx_pos = ctx->last_ctx_pos;
1407 jump = ctx->jump;
1408 DATA_POP_DISCARD(ctx);
1409 if (ctx_pos == -1)
1410 return ret;
1411 DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1412
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001413 switch (jump) {
1414 case JUMP_MAX_UNTIL_2:
1415 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1416 goto jump_max_until_2;
1417 case JUMP_MAX_UNTIL_3:
1418 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1419 goto jump_max_until_3;
1420 case JUMP_MIN_UNTIL_2:
1421 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1422 goto jump_min_until_2;
1423 case JUMP_MIN_UNTIL_3:
1424 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1425 goto jump_min_until_3;
1426 case JUMP_BRANCH:
1427 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1428 goto jump_branch;
1429 case JUMP_MAX_UNTIL_1:
1430 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1431 goto jump_max_until_1;
1432 case JUMP_MIN_UNTIL_1:
1433 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1434 goto jump_min_until_1;
1435 case JUMP_REPEAT:
1436 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1437 goto jump_repeat;
1438 case JUMP_REPEAT_ONE_1:
1439 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1440 goto jump_repeat_one_1;
1441 case JUMP_REPEAT_ONE_2:
1442 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1443 goto jump_repeat_one_2;
1444 case JUMP_MIN_REPEAT_ONE:
1445 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1446 goto jump_min_repeat_one;
1447 case JUMP_ASSERT:
1448 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1449 goto jump_assert;
1450 case JUMP_ASSERT_NOT:
1451 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1452 goto jump_assert_not;
1453 case JUMP_NONE:
1454 TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
1455 break;
1456 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001457
1458 return ret; /* should never get here */
Guido van Rossumb700df92000-03-31 14:59:30 +00001459}
1460
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001461LOCAL(Py_ssize_t)
Guido van Rossumb700df92000-03-31 14:59:30 +00001462SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1463{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001464 char* ptr = (char*)state->start;
1465 char* end = (char*)state->end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001466 Py_ssize_t status = 0;
1467 Py_ssize_t prefix_len = 0;
1468 Py_ssize_t prefix_skip = 0;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001469 SRE_CODE* prefix = NULL;
1470 SRE_CODE* charset = NULL;
1471 SRE_CODE* overlap = NULL;
1472 int flags = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +00001473
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001474 if (pattern[0] == SRE_OP_INFO) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001475 /* optimization info block */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001476 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001477
1478 flags = pattern[2];
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001479
Gustavo Niemeyer28b5bb32003-06-26 14:41:08 +00001480 if (pattern[3] > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001481 /* adjust end point (but make sure we leave at least one
Fredrik Lundh3562f112000-07-02 12:00:07 +00001482 character in there, so literal search will work) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001483 end -= (pattern[3]-1) * state->charsize;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001484 if (end <= ptr)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001485 end = ptr + state->charsize;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001486 }
1487
Fredrik Lundh3562f112000-07-02 12:00:07 +00001488 if (flags & SRE_INFO_PREFIX) {
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001489 /* pattern starts with a known prefix */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001490 /* <length> <skip> <prefix data> <overlap data> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001491 prefix_len = pattern[5];
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001492 prefix_skip = pattern[6];
1493 prefix = pattern + 7;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001494 overlap = prefix + prefix_len - 1;
1495 } else if (flags & SRE_INFO_CHARSET)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +00001496 /* pattern starts with a character from a known set */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001497 /* <charset> */
Fredrik Lundh3562f112000-07-02 12:00:07 +00001498 charset = pattern + 5;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001499
1500 pattern += 1 + pattern[1];
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001501 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001502
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001503 TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1504 TRACE(("charset = %p\n", charset));
1505
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001506#if defined(USE_FAST_SEARCH)
Fredrik Lundh28552902000-07-05 21:14:16 +00001507 if (prefix_len > 1) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001508 /* pattern starts with a known prefix. use the overlap
1509 table to skip forward as fast as we possibly can */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001510 Py_ssize_t i = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001511 end = (char *)state->end;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001512 while (ptr < end) {
1513 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001514 if ((SRE_CODE) SRE_CHARGET(state, ptr, 0) != prefix[i]) {
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001515 if (!i)
1516 break;
1517 else
1518 i = overlap[i];
1519 } else {
1520 if (++i == prefix_len) {
1521 /* found a potential match */
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001522 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001523 state->start = ptr - (prefix_len - 1) * state->charsize;
1524 state->ptr = ptr - (prefix_len - prefix_skip - 1) * state->charsize;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001525 if (flags & SRE_INFO_LITERAL)
1526 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001527 status = SRE_MATCH(state, pattern + 2*prefix_skip);
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001528 if (status != 0)
1529 return status;
1530 /* close but no cigar -- try again */
1531 i = overlap[i];
1532 }
1533 break;
1534 }
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001535 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001536 ptr += state->charsize;
Fredrik Lundh29c08be2000-06-29 23:33:12 +00001537 }
1538 return 0;
1539 }
1540#endif
Fredrik Lundh80946112000-06-29 18:03:25 +00001541
Fredrik Lundh3562f112000-07-02 12:00:07 +00001542 if (pattern[0] == SRE_OP_LITERAL) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001543 /* pattern starts with a literal character. this is used
Fredrik Lundh3562f112000-07-02 12:00:07 +00001544 for short prefixes, and if fast search is disabled */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001545 SRE_CODE chr = pattern[1];
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001546 end = (char*)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001547 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001548 while (ptr < end && (SRE_CODE) SRE_CHARGET(state, ptr, 0) != chr)
1549 ptr += state->charsize;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001550 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001551 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001552 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001553 state->start = ptr;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001554 ptr += state->charsize;
1555 state->ptr = ptr;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001556 if (flags & SRE_INFO_LITERAL)
1557 return 1; /* we got all of it */
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001558 status = SRE_MATCH(state, pattern + 2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001559 if (status != 0)
1560 break;
Fredrik Lundh3562f112000-07-02 12:00:07 +00001561 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001562 } else if (charset) {
1563 /* pattern starts with a character from a known set */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001564 end = (char*)state->end;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001565 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001566 while (ptr < end && !SRE_CHARSET(charset, SRE_CHARGET(state, ptr, 0)))
1567 ptr += state->charsize;
Gustavo Niemeyerc523b042002-11-07 03:28:56 +00001568 if (ptr >= end)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001569 return 0;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001570 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001571 state->start = ptr;
1572 state->ptr = ptr;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001573 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001574 if (status != 0)
1575 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001576 ptr += state->charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001577 }
1578 } else
1579 /* general case */
1580 while (ptr <= end) {
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001581 TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001582 state->start = state->ptr = ptr;
1583 ptr += state->charsize;
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001584 status = SRE_MATCH(state, pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001585 if (status != 0)
1586 break;
1587 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001588
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001589 return status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001590}
Tim Peters3d563502006-01-21 02:47:53 +00001591
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001592#if !defined(SRE_RECURSIVE)
Guido van Rossumb700df92000-03-31 14:59:30 +00001593
1594/* -------------------------------------------------------------------- */
1595/* factories and destructors */
1596
1597/* see sre.h for object declarations */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001598static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001599static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +00001600
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001601static int
1602sre_literal_template(int charsize, char* ptr, Py_ssize_t len)
1603{
1604 /* check if given string is a literal template (i.e. no escapes) */
1605 struct {
1606 int charsize;
1607 } state = {
1608 charsize
1609 };
1610 while (len-- > 0) {
1611 if (SRE_CHARGET((&state), ptr, 0) == '\\')
1612 return 0;
1613 ptr += charsize;
1614 }
1615 return 1;
1616}
1617
Guido van Rossumb700df92000-03-31 14:59:30 +00001618static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001619sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +00001620{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001621 return Py_BuildValue("l", sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +00001622}
1623
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001624static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +00001625sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001626{
1627 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001628 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001629 return NULL;
1630 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001631 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001632 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001633 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +00001634 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001635}
1636
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001637LOCAL(void)
1638state_reset(SRE_STATE* state)
1639{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001640 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001641 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001642
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001643 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001644 state->lastindex = -1;
1645
1646 state->repeat = NULL;
1647
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001648 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001649}
1650
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001651static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001652getstring(PyObject* string, Py_ssize_t* p_length,
Benjamin Peterson33d21a22012-03-07 14:59:13 -06001653 int* p_logical_charsize, int* p_charsize,
1654 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +00001655{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001656 /* given a python object, return a data pointer, a length (in
1657 characters), and a character size. return NULL if the object
1658 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +00001659
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001660 PyBufferProcs *buffer;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001661 Py_ssize_t size, bytes;
1662 int charsize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001663 void* ptr;
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001664
Alexandre Vassalotti70a23712007-10-14 02:05:51 +00001665 /* Unicode objects do not support the buffer API. So, get the data
1666 directly instead. */
1667 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 if (PyUnicode_READY(string) == -1)
1669 return NULL;
1670 ptr = PyUnicode_DATA(string);
1671 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001672 *p_charsize = PyUnicode_KIND(string);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001673 *p_logical_charsize = 4;
Alexandre Vassalotti70a23712007-10-14 02:05:51 +00001674 return ptr;
1675 }
1676
Victor Stinner0058b862011-09-29 03:27:47 +02001677 /* get pointer to byte string buffer */
Benjamin Petersone48944b2012-03-07 14:50:25 -06001678 view->len = -1;
Christian Heimes90aa7642007-12-19 02:45:37 +00001679 buffer = Py_TYPE(string)->tp_as_buffer;
Antoine Pitroufd036452008-08-19 17:56:33 +00001680 if (!buffer || !buffer->bf_getbuffer ||
Benjamin Petersone48944b2012-03-07 14:50:25 -06001681 (*buffer->bf_getbuffer)(string, view, PyBUF_SIMPLE) < 0) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001682 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1683 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001684 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001685
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001686 /* determine buffer size */
Benjamin Petersone48944b2012-03-07 14:50:25 -06001687 bytes = view->len;
1688 ptr = view->buf;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001689
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001690 if (bytes < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001691 PyErr_SetString(PyExc_TypeError, "buffer has negative size");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001692 goto err;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001693 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001694
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001695 /* determine character size */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001696 size = PyObject_Size(string);
Guido van Rossumb700df92000-03-31 14:59:30 +00001697
Christian Heimes72b710a2008-05-26 13:28:38 +00001698 if (PyBytes_Check(string) || bytes == size)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001699 charsize = 1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001700 else {
1701 PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001702 goto err;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00001703 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001704
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001705 *p_length = size;
1706 *p_charsize = charsize;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001707 *p_logical_charsize = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001708
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001709 if (ptr == NULL) {
Antoine Pitroufd036452008-08-19 17:56:33 +00001710 PyErr_SetString(PyExc_ValueError,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001711 "Buffer is NULL");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001712 goto err;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001713 }
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001714 return ptr;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001715 err:
1716 PyBuffer_Release(view);
1717 view->buf = NULL;
1718 return NULL;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001719}
1720
1721LOCAL(PyObject*)
1722state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001723 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001724{
1725 /* prepare state object */
1726
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001727 Py_ssize_t length;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001728 int logical_charsize, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001729 void* ptr;
1730
1731 memset(state, 0, sizeof(SRE_STATE));
1732
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001733 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001734 state->lastindex = -1;
1735
Benjamin Petersone48944b2012-03-07 14:50:25 -06001736 state->buffer.buf = NULL;
Benjamin Peterson33d21a22012-03-07 14:59:13 -06001737 ptr = getstring(string, &length, &logical_charsize, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001738 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -06001739 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001740
Benjamin Peterson33d21a22012-03-07 14:59:13 -06001741 if (logical_charsize == 1 && pattern->logical_charsize > 1) {
Benjamin Petersone48944b2012-03-07 14:50:25 -06001742 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001743 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001744 goto err;
1745 }
Benjamin Peterson33d21a22012-03-07 14:59:13 -06001746 if (logical_charsize > 1 && pattern->logical_charsize == 1) {
Benjamin Petersone48944b2012-03-07 14:50:25 -06001747 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001748 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -06001749 goto err;
1750 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001751
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001752 /* adjust boundaries */
1753 if (start < 0)
1754 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001755 else if (start > length)
1756 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +00001757
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001758 if (end < 0)
1759 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001760 else if (end > length)
1761 end = length;
1762
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001763 state->logical_charsize = logical_charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001764 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +00001765
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001766 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +00001767
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001768 state->start = (void*) ((char*) ptr + start * state->charsize);
1769 state->end = (void*) ((char*) ptr + end * state->charsize);
1770
1771 Py_INCREF(string);
1772 state->string = string;
1773 state->pos = start;
1774 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +00001775
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001776 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001777 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001778 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +00001779 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001780 else
Fredrik Lundhb389df32000-06-29 12:48:37 +00001781 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001782
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001783 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001784 err:
1785 if (state->buffer.buf)
1786 PyBuffer_Release(&state->buffer);
1787 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001788}
1789
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001790LOCAL(void)
1791state_fini(SRE_STATE* state)
1792{
Benjamin Petersone48944b2012-03-07 14:50:25 -06001793 if (state->buffer.buf)
1794 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001795 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001796 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001797}
1798
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001799/* calculate offset from start of string */
1800#define STATE_OFFSET(state, member)\
1801 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1802
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001803LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001804state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001805{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001806 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +00001807
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001808 index = (index - 1) * 2;
1809
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +00001810 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001811 if (empty)
1812 /* want empty string */
1813 i = j = 0;
1814 else {
1815 Py_INCREF(Py_None);
1816 return Py_None;
1817 }
Fredrik Lundh58100642000-08-09 09:14:35 +00001818 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001819 i = STATE_OFFSET(state, state->mark[index]);
1820 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001821 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001822
Fredrik Lundh58100642000-08-09 09:14:35 +00001823 return PySequence_GetSlice(string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001824}
1825
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001826static void
1827pattern_error(int status)
1828{
1829 switch (status) {
1830 case SRE_ERROR_RECURSION_LIMIT:
1831 PyErr_SetString(
1832 PyExc_RuntimeError,
1833 "maximum recursion limit exceeded"
1834 );
1835 break;
1836 case SRE_ERROR_MEMORY:
1837 PyErr_NoMemory();
1838 break;
Christian Heimes2380ac72008-01-09 00:17:24 +00001839 case SRE_ERROR_INTERRUPTED:
1840 /* An exception has already been raised, so let it fly */
1841 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00001842 default:
1843 /* other error codes indicate compiler/engine bugs */
1844 PyErr_SetString(
1845 PyExc_RuntimeError,
1846 "internal error in regular expression engine"
1847 );
1848 }
1849}
1850
Guido van Rossumb700df92000-03-31 14:59:30 +00001851static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001852pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001853{
Raymond Hettinger027bb632004-05-31 03:09:25 +00001854 if (self->weakreflist != NULL)
1855 PyObject_ClearWeakRefs((PyObject *) self);
Benjamin Petersone48944b2012-03-07 14:50:25 -06001856 if (self->view.buf)
1857 PyBuffer_Release(&self->view);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001858 Py_XDECREF(self->pattern);
1859 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +00001860 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001861 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001862}
1863
1864static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001865pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001866{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001867 SRE_STATE state;
1868 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001869
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001870 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001871 Py_ssize_t start = 0;
1872 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001873 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001874 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001875 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001876 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001877
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001878 string = state_init(&state, self, string, start, end);
1879 if (!string)
1880 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001881
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001882 state.ptr = state.start;
1883
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001884 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1885
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001886 if (state.logical_charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001887 status = sre_match(&state, PatternObject_GetCode(self));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001888 } else {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00001889 status = sre_umatch(&state, PatternObject_GetCode(self));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001890 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001891
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001892 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +00001893 if (PyErr_Occurred())
1894 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001895
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001896 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001897
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001898 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001899}
1900
1901static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00001902pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00001903{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001904 SRE_STATE state;
1905 int status;
Guido van Rossumb700df92000-03-31 14:59:30 +00001906
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001907 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001908 Py_ssize_t start = 0;
1909 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001910 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001911 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00001912 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001913 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00001914
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001915 string = state_init(&state, self, string, start, end);
1916 if (!string)
1917 return NULL;
1918
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001919 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
1920
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001921 if (state.logical_charsize == 1) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001922 status = sre_search(&state, PatternObject_GetCode(self));
1923 } else {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001924 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001925 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001926
Fredrik Lundh7898c3e2000-08-07 20:59:04 +00001927 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1928
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001929 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +00001930
Thomas Wouters89f507f2006-12-13 04:49:30 +00001931 if (PyErr_Occurred())
1932 return NULL;
1933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +00001935}
1936
1937static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001938call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001939{
1940 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001941 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001942 PyObject* func;
1943 PyObject* result;
1944
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001945 if (!args)
1946 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +00001947 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001948 if (!name)
1949 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001950 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001951 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001952 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001953 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001954 func = PyObject_GetAttrString(mod, function);
1955 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001956 if (!func)
1957 return NULL;
1958 result = PyObject_CallObject(func, args);
1959 Py_DECREF(func);
1960 Py_DECREF(args);
1961 return result;
1962}
1963
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001964#ifdef USE_BUILTIN_COPY
1965static int
1966deepcopy(PyObject** object, PyObject* memo)
1967{
1968 PyObject* copy;
1969
1970 copy = call(
1971 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001972 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001973 );
1974 if (!copy)
1975 return 0;
1976
1977 Py_DECREF(*object);
1978 *object = copy;
1979
1980 return 1; /* success */
1981}
1982#endif
1983
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00001984static PyObject*
Thomas Wouters1b7f8912007-09-19 03:06:30 +00001985join_list(PyObject* list, PyObject* string)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001986{
1987 /* join list elements */
1988
1989 PyObject* joiner;
1990#if PY_VERSION_HEX >= 0x01060000
1991 PyObject* function;
1992 PyObject* args;
1993#endif
1994 PyObject* result;
1995
Thomas Wouters1b7f8912007-09-19 03:06:30 +00001996 joiner = PySequence_GetSlice(string, 0, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001997 if (!joiner)
1998 return NULL;
1999
Thomas Wouters1b7f8912007-09-19 03:06:30 +00002000 if (PyList_GET_SIZE(list) == 0) {
2001 Py_DECREF(list);
2002 return joiner;
2003 }
2004
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002005#if PY_VERSION_HEX >= 0x01060000
2006 function = PyObject_GetAttrString(joiner, "join");
2007 if (!function) {
2008 Py_DECREF(joiner);
2009 return NULL;
2010 }
2011 args = PyTuple_New(1);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002012 if (!args) {
2013 Py_DECREF(function);
2014 Py_DECREF(joiner);
2015 return NULL;
2016 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002017 PyTuple_SET_ITEM(args, 0, list);
2018 result = PyObject_CallObject(function, args);
2019 Py_DECREF(args); /* also removes list */
2020 Py_DECREF(function);
2021#else
2022 result = call(
2023 "string", "join",
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002024 PyTuple_Pack(2, list, joiner)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002025 );
2026#endif
2027 Py_DECREF(joiner);
2028
2029 return result;
2030}
2031
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002032static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002033pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002034{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002035 SRE_STATE state;
2036 PyObject* list;
2037 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002038 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +00002039
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002040 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002041 Py_ssize_t start = 0;
2042 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002043 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002044 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +00002045 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002046 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002047
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002048 string = state_init(&state, self, string, start, end);
2049 if (!string)
2050 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002051
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002052 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002053 if (!list) {
2054 state_fini(&state);
2055 return NULL;
2056 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002057
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002058 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +00002059
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +00002061
Fredrik Lundhebc37b22000-10-28 19:30:41 +00002062 state_reset(&state);
2063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 state.ptr = state.start;
2065
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002066 if (state.logical_charsize == 1) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002067 status = sre_search(&state, PatternObject_GetCode(self));
2068 } else {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002069 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002070 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002071
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002072 if (PyErr_Occurred())
2073 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00002074
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002075 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002076 if (status == 0)
2077 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +00002078 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002079 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002080 }
Tim Peters3d563502006-01-21 02:47:53 +00002081
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002082 /* don't bother to build a match object */
2083 switch (self->groups) {
2084 case 0:
2085 b = STATE_OFFSET(&state, state.start);
2086 e = STATE_OFFSET(&state, state.ptr);
2087 item = PySequence_GetSlice(string, b, e);
2088 if (!item)
2089 goto error;
2090 break;
2091 case 1:
2092 item = state_getslice(&state, 1, string, 1);
2093 if (!item)
2094 goto error;
2095 break;
2096 default:
2097 item = PyTuple_New(self->groups);
2098 if (!item)
2099 goto error;
2100 for (i = 0; i < self->groups; i++) {
2101 PyObject* o = state_getslice(&state, i+1, string, 1);
2102 if (!o) {
2103 Py_DECREF(item);
2104 goto error;
2105 }
2106 PyTuple_SET_ITEM(item, i, o);
2107 }
2108 break;
2109 }
2110
2111 status = PyList_Append(list, item);
2112 Py_DECREF(item);
2113 if (status < 0)
2114 goto error;
2115
2116 if (state.ptr == state.start)
2117 state.start = (void*) ((char*) state.ptr + state.charsize);
2118 else
2119 state.start = state.ptr;
2120
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002121 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002122
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002123 state_fini(&state);
2124 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +00002125
2126error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002127 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002128 state_fini(&state);
2129 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002130
Guido van Rossumb700df92000-03-31 14:59:30 +00002131}
2132
Fredrik Lundh703ce812001-10-24 22:16:30 +00002133#if PY_VERSION_HEX >= 0x02020000
2134static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002135pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +00002136{
2137 PyObject* scanner;
2138 PyObject* search;
2139 PyObject* iterator;
2140
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002141 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +00002142 if (!scanner)
2143 return NULL;
2144
2145 search = PyObject_GetAttrString(scanner, "search");
2146 Py_DECREF(scanner);
2147 if (!search)
2148 return NULL;
2149
2150 iterator = PyCallIter_New(search, Py_None);
2151 Py_DECREF(search);
2152
2153 return iterator;
2154}
2155#endif
2156
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002157static PyObject*
2158pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2159{
2160 SRE_STATE state;
2161 PyObject* list;
2162 PyObject* item;
2163 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002164 Py_ssize_t n;
2165 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002166 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002167
2168 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002169 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002170 static char* kwlist[] = { "source", "maxsplit", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002171 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002172 &string, &maxsplit))
2173 return NULL;
2174
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002175 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002176 if (!string)
2177 return NULL;
2178
2179 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002180 if (!list) {
2181 state_fini(&state);
2182 return NULL;
2183 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002184
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002185 n = 0;
2186 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002187
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002188 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002189
2190 state_reset(&state);
2191
2192 state.ptr = state.start;
2193
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002194 if (state.logical_charsize == 1) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002195 status = sre_search(&state, PatternObject_GetCode(self));
2196 } else {
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002197 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002198 }
2199
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002200 if (PyErr_Occurred())
2201 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00002202
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002203 if (status <= 0) {
2204 if (status == 0)
2205 break;
2206 pattern_error(status);
2207 goto error;
2208 }
Tim Peters3d563502006-01-21 02:47:53 +00002209
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002210 if (state.start == state.ptr) {
2211 if (last == state.end)
2212 break;
2213 /* skip one character */
2214 state.start = (void*) ((char*) state.ptr + state.charsize);
2215 continue;
2216 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002217
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002218 /* get segment before this match */
2219 item = PySequence_GetSlice(
2220 string, STATE_OFFSET(&state, last),
2221 STATE_OFFSET(&state, state.start)
2222 );
2223 if (!item)
2224 goto error;
2225 status = PyList_Append(list, item);
2226 Py_DECREF(item);
2227 if (status < 0)
2228 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002229
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002230 /* add groups (if any) */
2231 for (i = 0; i < self->groups; i++) {
2232 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002233 if (!item)
2234 goto error;
2235 status = PyList_Append(list, item);
2236 Py_DECREF(item);
2237 if (status < 0)
2238 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002239 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002240
2241 n = n + 1;
2242
2243 last = state.start = state.ptr;
2244
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002245 }
2246
Fredrik Lundhf864aa82001-10-22 06:01:56 +00002247 /* get segment following last match (even if empty) */
2248 item = PySequence_GetSlice(
2249 string, STATE_OFFSET(&state, last), state.endpos
2250 );
2251 if (!item)
2252 goto error;
2253 status = PyList_Append(list, item);
2254 Py_DECREF(item);
2255 if (status < 0)
2256 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002257
2258 state_fini(&state);
2259 return list;
2260
2261error:
2262 Py_DECREF(list);
2263 state_fini(&state);
2264 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002265
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002266}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00002267
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002268static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002269pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002270 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002271{
2272 SRE_STATE state;
2273 PyObject* list;
2274 PyObject* item;
2275 PyObject* filter;
2276 PyObject* args;
2277 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002278 void* ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002279 int status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002280 Py_ssize_t n;
2281 Py_ssize_t i, b, e;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002282 int logical_charsize, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002283 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06002284 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002285
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002286 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002287 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002288 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002289 Py_INCREF(filter);
2290 filter_is_callable = 1;
2291 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002292 /* if not callable, check if it's a literal string */
2293 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06002294 view.buf = NULL;
Benjamin Peterson33d21a22012-03-07 14:59:13 -06002295 ptr = getstring(ptemplate, &n, &logical_charsize, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002296 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002297 if (ptr) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002298 literal = sre_literal_template(b, ptr, n);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002299 } else {
2300 PyErr_Clear();
2301 literal = 0;
2302 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06002303 if (view.buf)
2304 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002305 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002306 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002307 Py_INCREF(filter);
2308 filter_is_callable = 0;
2309 } else {
2310 /* not a literal; hand it over to the template compiler */
2311 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002312 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002313 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002314 );
2315 if (!filter)
2316 return NULL;
2317 filter_is_callable = PyCallable_Check(filter);
2318 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00002319 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002320
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002321 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002322 if (!string) {
2323 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002324 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00002325 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002326
2327 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002328 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00002329 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00002330 state_fini(&state);
2331 return NULL;
2332 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002333
2334 n = i = 0;
2335
2336 while (!count || n < count) {
2337
2338 state_reset(&state);
2339
2340 state.ptr = state.start;
2341
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002342 if (state.logical_charsize == 1) {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002343 status = sre_search(&state, PatternObject_GetCode(self));
2344 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002345 status = sre_usearch(&state, PatternObject_GetCode(self));
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002346 }
2347
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002348 if (PyErr_Occurred())
2349 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00002350
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002351 if (status <= 0) {
2352 if (status == 0)
2353 break;
2354 pattern_error(status);
2355 goto error;
2356 }
Tim Peters3d563502006-01-21 02:47:53 +00002357
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002358 b = STATE_OFFSET(&state, state.start);
2359 e = STATE_OFFSET(&state, state.ptr);
2360
2361 if (i < b) {
2362 /* get segment before this match */
2363 item = PySequence_GetSlice(string, i, b);
2364 if (!item)
2365 goto error;
2366 status = PyList_Append(list, item);
2367 Py_DECREF(item);
2368 if (status < 0)
2369 goto error;
2370
2371 } else if (i == b && i == e && n > 0)
2372 /* ignore empty match on latest position */
2373 goto next;
2374
2375 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00002376 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002377 match = pattern_new_match(self, &state, 1);
2378 if (!match)
2379 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00002380 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002381 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00002382 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002383 goto error;
2384 }
2385 item = PyObject_CallObject(filter, args);
2386 Py_DECREF(args);
2387 Py_DECREF(match);
2388 if (!item)
2389 goto error;
2390 } else {
2391 /* filter is literal string */
2392 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00002393 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002394 }
2395
2396 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00002397 if (item != Py_None) {
2398 status = PyList_Append(list, item);
2399 Py_DECREF(item);
2400 if (status < 0)
2401 goto error;
2402 }
Tim Peters3d563502006-01-21 02:47:53 +00002403
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002404 i = e;
2405 n = n + 1;
2406
2407next:
2408 /* move on */
2409 if (state.ptr == state.start)
2410 state.start = (void*) ((char*) state.ptr + state.charsize);
2411 else
2412 state.start = state.ptr;
2413
2414 }
2415
2416 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00002417 if (i < state.endpos) {
2418 item = PySequence_GetSlice(string, i, state.endpos);
2419 if (!item)
2420 goto error;
2421 status = PyList_Append(list, item);
2422 Py_DECREF(item);
2423 if (status < 0)
2424 goto error;
2425 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002426
2427 state_fini(&state);
2428
Guido van Rossum4e173842001-12-07 04:25:10 +00002429 Py_DECREF(filter);
2430
Fredrik Lundhdac58492001-10-21 21:48:30 +00002431 /* convert list to single string (also removes list) */
Thomas Wouters1b7f8912007-09-19 03:06:30 +00002432 item = join_list(list, string);
Fredrik Lundhdac58492001-10-21 21:48:30 +00002433
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002434 if (!item)
2435 return NULL;
2436
2437 if (subn)
2438 return Py_BuildValue("Ni", item, n);
2439
2440 return item;
2441
2442error:
2443 Py_DECREF(list);
2444 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00002445 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002446 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00002447
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002448}
2449
2450static PyObject*
2451pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2452{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002453 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002454 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002455 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002456 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002457 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002458 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002459 return NULL;
2460
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002461 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002462}
2463
2464static PyObject*
2465pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2466{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002467 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002468 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002469 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002470 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002471 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002472 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002473 return NULL;
2474
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002475 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002476}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00002477
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002478static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002479pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002480{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002481#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002482 PatternObject* copy;
2483 int offset;
2484
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002485 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2486 if (!copy)
2487 return NULL;
2488
2489 offset = offsetof(PatternObject, groups);
2490
2491 Py_XINCREF(self->groupindex);
2492 Py_XINCREF(self->indexgroup);
2493 Py_XINCREF(self->pattern);
2494
2495 memcpy((char*) copy + offset, (char*) self + offset,
2496 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00002497 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002498
2499 return (PyObject*) copy;
2500#else
2501 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2502 return NULL;
2503#endif
2504}
2505
2506static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002507pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002508{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002509#ifdef USE_BUILTIN_COPY
2510 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002511
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002512 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002513 if (!copy)
2514 return NULL;
2515
2516 if (!deepcopy(&copy->groupindex, memo) ||
2517 !deepcopy(&copy->indexgroup, memo) ||
2518 !deepcopy(&copy->pattern, memo)) {
2519 Py_DECREF(copy);
2520 return NULL;
2521 }
2522
2523#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002524 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2525 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002526#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002527}
2528
Raymond Hettinger94478742004-09-24 04:31:19 +00002529PyDoc_STRVAR(pattern_match_doc,
2530"match(string[, pos[, endpos]]) --> match object or None.\n\
2531 Matches zero or more characters at the beginning of the string");
2532
2533PyDoc_STRVAR(pattern_search_doc,
2534"search(string[, pos[, endpos]]) --> match object or None.\n\
2535 Scan through string looking for a match, and return a corresponding\n\
2536 MatchObject instance. Return None if no position in the string matches.");
2537
2538PyDoc_STRVAR(pattern_split_doc,
2539"split(string[, maxsplit = 0]) --> list.\n\
2540 Split string by the occurrences of pattern.");
2541
2542PyDoc_STRVAR(pattern_findall_doc,
2543"findall(string[, pos[, endpos]]) --> list.\n\
2544 Return a list of all non-overlapping matches of pattern in string.");
2545
2546PyDoc_STRVAR(pattern_finditer_doc,
2547"finditer(string[, pos[, endpos]]) --> iterator.\n\
2548 Return an iterator over all non-overlapping matches for the \n\
2549 RE pattern in string. For each match, the iterator returns a\n\
2550 match object.");
2551
2552PyDoc_STRVAR(pattern_sub_doc,
2553"sub(repl, string[, count = 0]) --> newstring\n\
2554 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00002555 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002556
2557PyDoc_STRVAR(pattern_subn_doc,
2558"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2559 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2560 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00002561 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00002562
2563PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2564
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002565static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00002566 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002567 pattern_match_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002568 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002569 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00002570 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002571 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00002572 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002573 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002574 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002575 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00002576 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002577 pattern_findall_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002578#if PY_VERSION_HEX >= 0x02020000
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002579 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002580 pattern_finditer_doc},
Fredrik Lundh703ce812001-10-24 22:16:30 +00002581#endif
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002582 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002583 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2584 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002585 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002586};
2587
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002588#define PAT_OFF(x) offsetof(PatternObject, x)
2589static PyMemberDef pattern_members[] = {
2590 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2591 {"flags", T_INT, PAT_OFF(flags), READONLY},
2592 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2593 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
2594 {NULL} /* Sentinel */
2595};
Guido van Rossumb700df92000-03-31 14:59:30 +00002596
Neal Norwitz57c179c2006-03-22 07:18:02 +00002597static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002598 PyVarObject_HEAD_INIT(NULL, 0)
2599 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00002600 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002601 (destructor)pattern_dealloc, /* tp_dealloc */
2602 0, /* tp_print */
2603 0, /* tp_getattr */
2604 0, /* tp_setattr */
2605 0, /* tp_reserved */
2606 0, /* tp_repr */
2607 0, /* tp_as_number */
2608 0, /* tp_as_sequence */
2609 0, /* tp_as_mapping */
2610 0, /* tp_hash */
2611 0, /* tp_call */
2612 0, /* tp_str */
2613 0, /* tp_getattro */
2614 0, /* tp_setattro */
2615 0, /* tp_as_buffer */
2616 Py_TPFLAGS_DEFAULT, /* tp_flags */
2617 pattern_doc, /* tp_doc */
2618 0, /* tp_traverse */
2619 0, /* tp_clear */
2620 0, /* tp_richcompare */
2621 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2622 0, /* tp_iter */
2623 0, /* tp_iternext */
2624 pattern_methods, /* tp_methods */
2625 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00002626};
2627
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002628static int _validate(PatternObject *self); /* Forward */
2629
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002630static PyObject *
2631_compile(PyObject* self_, PyObject* args)
2632{
2633 /* "compile" pattern descriptor to pattern object */
2634
2635 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002636 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002637
2638 PyObject* pattern;
2639 int flags = 0;
2640 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002641 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002642 PyObject* groupindex = NULL;
2643 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06002644
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002645 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002646 &PyList_Type, &code, &groups,
2647 &groupindex, &indexgroup))
2648 return NULL;
2649
2650 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00002651 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002652 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2653 if (!self)
2654 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002655 self->weakreflist = NULL;
2656 self->pattern = NULL;
2657 self->groupindex = NULL;
2658 self->indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06002659 self->view.buf = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002660
2661 self->codesize = n;
2662
2663 for (i = 0; i < n; i++) {
2664 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00002665 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002666 self->code[i] = (SRE_CODE) value;
2667 if ((unsigned long) self->code[i] != value) {
2668 PyErr_SetString(PyExc_OverflowError,
2669 "regular expression code size limit exceeded");
2670 break;
2671 }
2672 }
2673
2674 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002675 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002676 return NULL;
2677 }
2678
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002679 if (pattern == Py_None) {
2680 self->logical_charsize = -1;
2681 self->charsize = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01002682 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002683 else {
2684 Py_ssize_t p_length;
2685 if (!getstring(pattern, &p_length, &self->logical_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -06002686 &self->charsize, &self->view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002687 Py_DECREF(self);
2688 return NULL;
2689 }
2690 }
Antoine Pitroufd036452008-08-19 17:56:33 +00002691
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002692 Py_INCREF(pattern);
2693 self->pattern = pattern;
2694
2695 self->flags = flags;
2696
2697 self->groups = groups;
2698
2699 Py_XINCREF(groupindex);
2700 self->groupindex = groupindex;
2701
2702 Py_XINCREF(indexgroup);
2703 self->indexgroup = indexgroup;
2704
2705 self->weakreflist = NULL;
2706
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002707 if (!_validate(self)) {
2708 Py_DECREF(self);
2709 return NULL;
2710 }
2711
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002712 return (PyObject*) self;
2713}
2714
Guido van Rossumb700df92000-03-31 14:59:30 +00002715/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002716/* Code validation */
2717
2718/* To learn more about this code, have a look at the _compile() function in
2719 Lib/sre_compile.py. The validation functions below checks the code array
2720 for conformance with the code patterns generated there.
2721
2722 The nice thing about the generated code is that it is position-independent:
2723 all jumps are relative jumps forward. Also, jumps don't cross each other:
2724 the target of a later jump is always earlier than the target of an earlier
2725 jump. IOW, this is okay:
2726
2727 J---------J-------T--------T
2728 \ \_____/ /
2729 \______________________/
2730
2731 but this is not:
2732
2733 J---------J-------T--------T
2734 \_________\_____/ /
2735 \____________/
2736
2737 It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4
2738 bytes wide (the latter if Python is compiled for "wide" unicode support).
2739*/
2740
2741/* Defining this one enables tracing of the validator */
2742#undef VVERBOSE
2743
2744/* Trace macro for the validator */
2745#if defined(VVERBOSE)
2746#define VTRACE(v) printf v
2747#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08002748#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002749#endif
2750
2751/* Report failure */
2752#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
2753
2754/* Extract opcode, argument, or skip count from code array */
2755#define GET_OP \
2756 do { \
2757 VTRACE(("%p: ", code)); \
2758 if (code >= end) FAIL; \
2759 op = *code++; \
2760 VTRACE(("%lu (op)\n", (unsigned long)op)); \
2761 } while (0)
2762#define GET_ARG \
2763 do { \
2764 VTRACE(("%p= ", code)); \
2765 if (code >= end) FAIL; \
2766 arg = *code++; \
2767 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
2768 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00002769#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002770 do { \
2771 VTRACE(("%p= ", code)); \
2772 if (code >= end) FAIL; \
2773 skip = *code; \
2774 VTRACE(("%lu (skip to %p)\n", \
2775 (unsigned long)skip, code+skip)); \
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00002776 if (code+skip-adj < code || code+skip-adj > end)\
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002777 FAIL; \
2778 code++; \
2779 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00002780#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002781
2782static int
2783_validate_charset(SRE_CODE *code, SRE_CODE *end)
2784{
2785 /* Some variables are manipulated by the macros above */
2786 SRE_CODE op;
2787 SRE_CODE arg;
2788 SRE_CODE offset;
2789 int i;
2790
2791 while (code < end) {
2792 GET_OP;
2793 switch (op) {
2794
2795 case SRE_OP_NEGATE:
2796 break;
2797
2798 case SRE_OP_LITERAL:
2799 GET_ARG;
2800 break;
2801
2802 case SRE_OP_RANGE:
2803 GET_ARG;
2804 GET_ARG;
2805 break;
2806
2807 case SRE_OP_CHARSET:
2808 offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
2809 if (code+offset < code || code+offset > end)
2810 FAIL;
2811 code += offset;
2812 break;
2813
2814 case SRE_OP_BIGCHARSET:
2815 GET_ARG; /* Number of blocks */
2816 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
2817 if (code+offset < code || code+offset > end)
2818 FAIL;
2819 /* Make sure that each byte points to a valid block */
2820 for (i = 0; i < 256; i++) {
2821 if (((unsigned char *)code)[i] >= arg)
2822 FAIL;
2823 }
2824 code += offset;
2825 offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
2826 if (code+offset < code || code+offset > end)
2827 FAIL;
2828 code += offset;
2829 break;
2830
2831 case SRE_OP_CATEGORY:
2832 GET_ARG;
2833 switch (arg) {
2834 case SRE_CATEGORY_DIGIT:
2835 case SRE_CATEGORY_NOT_DIGIT:
2836 case SRE_CATEGORY_SPACE:
2837 case SRE_CATEGORY_NOT_SPACE:
2838 case SRE_CATEGORY_WORD:
2839 case SRE_CATEGORY_NOT_WORD:
2840 case SRE_CATEGORY_LINEBREAK:
2841 case SRE_CATEGORY_NOT_LINEBREAK:
2842 case SRE_CATEGORY_LOC_WORD:
2843 case SRE_CATEGORY_LOC_NOT_WORD:
2844 case SRE_CATEGORY_UNI_DIGIT:
2845 case SRE_CATEGORY_UNI_NOT_DIGIT:
2846 case SRE_CATEGORY_UNI_SPACE:
2847 case SRE_CATEGORY_UNI_NOT_SPACE:
2848 case SRE_CATEGORY_UNI_WORD:
2849 case SRE_CATEGORY_UNI_NOT_WORD:
2850 case SRE_CATEGORY_UNI_LINEBREAK:
2851 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
2852 break;
2853 default:
2854 FAIL;
2855 }
2856 break;
2857
2858 default:
2859 FAIL;
2860
2861 }
2862 }
2863
2864 return 1;
2865}
2866
2867static int
2868_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2869{
2870 /* Some variables are manipulated by the macros above */
2871 SRE_CODE op;
2872 SRE_CODE arg;
2873 SRE_CODE skip;
2874
2875 VTRACE(("code=%p, end=%p\n", code, end));
2876
2877 if (code > end)
2878 FAIL;
2879
2880 while (code < end) {
2881 GET_OP;
2882 switch (op) {
2883
2884 case SRE_OP_MARK:
2885 /* We don't check whether marks are properly nested; the
2886 sre_match() code is robust even if they don't, and the worst
2887 you can get is nonsensical match results. */
2888 GET_ARG;
2889 if (arg > 2*groups+1) {
2890 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
2891 FAIL;
2892 }
2893 break;
2894
2895 case SRE_OP_LITERAL:
2896 case SRE_OP_NOT_LITERAL:
2897 case SRE_OP_LITERAL_IGNORE:
2898 case SRE_OP_NOT_LITERAL_IGNORE:
2899 GET_ARG;
2900 /* The arg is just a character, nothing to check */
2901 break;
2902
2903 case SRE_OP_SUCCESS:
2904 case SRE_OP_FAILURE:
2905 /* Nothing to check; these normally end the matching process */
2906 break;
2907
2908 case SRE_OP_AT:
2909 GET_ARG;
2910 switch (arg) {
2911 case SRE_AT_BEGINNING:
2912 case SRE_AT_BEGINNING_STRING:
2913 case SRE_AT_BEGINNING_LINE:
2914 case SRE_AT_END:
2915 case SRE_AT_END_LINE:
2916 case SRE_AT_END_STRING:
2917 case SRE_AT_BOUNDARY:
2918 case SRE_AT_NON_BOUNDARY:
2919 case SRE_AT_LOC_BOUNDARY:
2920 case SRE_AT_LOC_NON_BOUNDARY:
2921 case SRE_AT_UNI_BOUNDARY:
2922 case SRE_AT_UNI_NON_BOUNDARY:
2923 break;
2924 default:
2925 FAIL;
2926 }
2927 break;
2928
2929 case SRE_OP_ANY:
2930 case SRE_OP_ANY_ALL:
2931 /* These have no operands */
2932 break;
2933
2934 case SRE_OP_IN:
2935 case SRE_OP_IN_IGNORE:
2936 GET_SKIP;
2937 /* Stop 1 before the end; we check the FAILURE below */
2938 if (!_validate_charset(code, code+skip-2))
2939 FAIL;
2940 if (code[skip-2] != SRE_OP_FAILURE)
2941 FAIL;
2942 code += skip-1;
2943 break;
2944
2945 case SRE_OP_INFO:
2946 {
2947 /* A minimal info field is
2948 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
2949 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
2950 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02002951 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002952 SRE_CODE *newcode;
2953 GET_SKIP;
2954 newcode = code+skip-1;
2955 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02002956 GET_ARG;
2957 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002958 /* Check that only valid flags are present */
2959 if ((flags & ~(SRE_INFO_PREFIX |
2960 SRE_INFO_LITERAL |
2961 SRE_INFO_CHARSET)) != 0)
2962 FAIL;
2963 /* PREFIX and CHARSET are mutually exclusive */
2964 if ((flags & SRE_INFO_PREFIX) &&
2965 (flags & SRE_INFO_CHARSET))
2966 FAIL;
2967 /* LITERAL implies PREFIX */
2968 if ((flags & SRE_INFO_LITERAL) &&
2969 !(flags & SRE_INFO_PREFIX))
2970 FAIL;
2971 /* Validate the prefix */
2972 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02002973 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002974 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02002975 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00002976 /* Here comes the prefix string */
2977 if (code+prefix_len < code || code+prefix_len > newcode)
2978 FAIL;
2979 code += prefix_len;
2980 /* And here comes the overlap table */
2981 if (code+prefix_len < code || code+prefix_len > newcode)
2982 FAIL;
2983 /* Each overlap value should be < prefix_len */
2984 for (i = 0; i < prefix_len; i++) {
2985 if (code[i] >= prefix_len)
2986 FAIL;
2987 }
2988 code += prefix_len;
2989 }
2990 /* Validate the charset */
2991 if (flags & SRE_INFO_CHARSET) {
2992 if (!_validate_charset(code, newcode-1))
2993 FAIL;
2994 if (newcode[-1] != SRE_OP_FAILURE)
2995 FAIL;
2996 code = newcode;
2997 }
2998 else if (code != newcode) {
2999 VTRACE(("code=%p, newcode=%p\n", code, newcode));
3000 FAIL;
3001 }
3002 }
3003 break;
3004
3005 case SRE_OP_BRANCH:
3006 {
3007 SRE_CODE *target = NULL;
3008 for (;;) {
3009 GET_SKIP;
3010 if (skip == 0)
3011 break;
3012 /* Stop 2 before the end; we check the JUMP below */
3013 if (!_validate_inner(code, code+skip-3, groups))
3014 FAIL;
3015 code += skip-3;
3016 /* Check that it ends with a JUMP, and that each JUMP
3017 has the same target */
3018 GET_OP;
3019 if (op != SRE_OP_JUMP)
3020 FAIL;
3021 GET_SKIP;
3022 if (target == NULL)
3023 target = code+skip-1;
3024 else if (code+skip-1 != target)
3025 FAIL;
3026 }
3027 }
3028 break;
3029
3030 case SRE_OP_REPEAT_ONE:
3031 case SRE_OP_MIN_REPEAT_ONE:
3032 {
3033 SRE_CODE min, max;
3034 GET_SKIP;
3035 GET_ARG; min = arg;
3036 GET_ARG; max = arg;
3037 if (min > max)
3038 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003039 if (max > 65535)
3040 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003041 if (!_validate_inner(code, code+skip-4, groups))
3042 FAIL;
3043 code += skip-4;
3044 GET_OP;
3045 if (op != SRE_OP_SUCCESS)
3046 FAIL;
3047 }
3048 break;
3049
3050 case SRE_OP_REPEAT:
3051 {
3052 SRE_CODE min, max;
3053 GET_SKIP;
3054 GET_ARG; min = arg;
3055 GET_ARG; max = arg;
3056 if (min > max)
3057 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003058 if (max > 65535)
3059 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003060 if (!_validate_inner(code, code+skip-3, groups))
3061 FAIL;
3062 code += skip-3;
3063 GET_OP;
3064 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
3065 FAIL;
3066 }
3067 break;
3068
3069 case SRE_OP_GROUPREF:
3070 case SRE_OP_GROUPREF_IGNORE:
3071 GET_ARG;
3072 if (arg >= groups)
3073 FAIL;
3074 break;
3075
3076 case SRE_OP_GROUPREF_EXISTS:
3077 /* The regex syntax for this is: '(?(group)then|else)', where
3078 'group' is either an integer group number or a group name,
3079 'then' and 'else' are sub-regexes, and 'else' is optional. */
3080 GET_ARG;
3081 if (arg >= groups)
3082 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00003083 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00003084 code--; /* The skip is relative to the first arg! */
3085 /* There are two possibilities here: if there is both a 'then'
3086 part and an 'else' part, the generated code looks like:
3087
3088 GROUPREF_EXISTS
3089 <group>
3090 <skipyes>
3091 ...then part...
3092 JUMP
3093 <skipno>
3094 (<skipyes> jumps here)
3095 ...else part...
3096 (<skipno> jumps here)
3097
3098 If there is only a 'then' part, it looks like:
3099
3100 GROUPREF_EXISTS
3101 <group>
3102 <skip>
3103 ...then part...
3104 (<skip> jumps here)
3105
3106 There is no direct way to decide which it is, and we don't want
3107 to allow arbitrary jumps anywhere in the code; so we just look
3108 for a JUMP opcode preceding our skip target.
3109 */
3110 if (skip >= 3 && code+skip-3 >= code &&
3111 code[skip-3] == SRE_OP_JUMP)
3112 {
3113 VTRACE(("both then and else parts present\n"));
3114 if (!_validate_inner(code+1, code+skip-3, groups))
3115 FAIL;
3116 code += skip-2; /* Position after JUMP, at <skipno> */
3117 GET_SKIP;
3118 if (!_validate_inner(code, code+skip-1, groups))
3119 FAIL;
3120 code += skip-1;
3121 }
3122 else {
3123 VTRACE(("only a then part present\n"));
3124 if (!_validate_inner(code+1, code+skip-1, groups))
3125 FAIL;
3126 code += skip-1;
3127 }
3128 break;
3129
3130 case SRE_OP_ASSERT:
3131 case SRE_OP_ASSERT_NOT:
3132 GET_SKIP;
3133 GET_ARG; /* 0 for lookahead, width for lookbehind */
3134 code--; /* Back up over arg to simplify math below */
3135 if (arg & 0x80000000)
3136 FAIL; /* Width too large */
3137 /* Stop 1 before the end; we check the SUCCESS below */
3138 if (!_validate_inner(code+1, code+skip-2, groups))
3139 FAIL;
3140 code += skip-2;
3141 GET_OP;
3142 if (op != SRE_OP_SUCCESS)
3143 FAIL;
3144 break;
3145
3146 default:
3147 FAIL;
3148
3149 }
3150 }
3151
3152 VTRACE(("okay\n"));
3153 return 1;
3154}
3155
3156static int
3157_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
3158{
3159 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
3160 FAIL;
3161 if (groups == 0) /* fix for simplejson */
3162 groups = 100; /* 100 groups should always be safe */
3163 return _validate_inner(code, end-1, groups);
3164}
3165
3166static int
3167_validate(PatternObject *self)
3168{
3169 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
3170 {
3171 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
3172 return 0;
3173 }
3174 else
3175 VTRACE(("Success!\n"));
3176 return 1;
3177}
3178
3179/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00003180/* match methods */
3181
3182static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003183match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00003184{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003185 Py_XDECREF(self->regs);
3186 Py_XDECREF(self->string);
3187 Py_DECREF(self->pattern);
3188 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00003189}
3190
3191static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003192match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00003193{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003194 if (index < 0 || index >= self->groups) {
3195 /* raise IndexError if we were given a bad group number */
3196 PyErr_SetString(
3197 PyExc_IndexError,
3198 "no such group"
3199 );
3200 return NULL;
3201 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003202
Fredrik Lundh6f013982000-07-03 18:44:21 +00003203 index *= 2;
3204
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003205 if (self->string == Py_None || self->mark[index] < 0) {
3206 /* return default value if the string or group is undefined */
3207 Py_INCREF(def);
3208 return def;
3209 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003210
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003211 return PySequence_GetSlice(
3212 self->string, self->mark[index], self->mark[index+1]
3213 );
Guido van Rossumb700df92000-03-31 14:59:30 +00003214}
3215
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003216static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003217match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00003218{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003219 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00003220
Guido van Rossumddefaf32007-01-14 03:31:43 +00003221 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03003222 /* Default value */
3223 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00003224
Christian Heimes217cfd12007-12-02 14:31:20 +00003225 if (PyLong_Check(index))
3226 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00003227
Fredrik Lundh6f013982000-07-03 18:44:21 +00003228 i = -1;
3229
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003230 if (self->pattern->groupindex) {
3231 index = PyObject_GetItem(self->pattern->groupindex, index);
3232 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00003233 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00003234 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00003235 Py_DECREF(index);
3236 } else
3237 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003238 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00003239
3240 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003241}
3242
3243static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00003244match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003245{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003246 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00003247}
3248
3249static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003250match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003251{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003252 /* delegate to Python code */
3253 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00003254 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003255 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00003256 );
3257}
3258
3259static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003260match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003261{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003262 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003263 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00003264
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003265 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00003266
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003267 switch (size) {
3268 case 0:
3269 result = match_getslice(self, Py_False, Py_None);
3270 break;
3271 case 1:
3272 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
3273 break;
3274 default:
3275 /* fetch multiple items */
3276 result = PyTuple_New(size);
3277 if (!result)
3278 return NULL;
3279 for (i = 0; i < size; i++) {
3280 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00003281 self, PyTuple_GET_ITEM(args, i), Py_None
3282 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003283 if (!item) {
3284 Py_DECREF(result);
3285 return NULL;
3286 }
3287 PyTuple_SET_ITEM(result, i, item);
3288 }
3289 break;
3290 }
3291 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003292}
3293
3294static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00003295match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00003296{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003297 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003298 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00003299
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003300 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003301 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00003302 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003303 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003304
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003305 result = PyTuple_New(self->groups-1);
3306 if (!result)
3307 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003308
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003309 for (index = 1; index < self->groups; index++) {
3310 PyObject* item;
3311 item = match_getslice_by_index(self, index, def);
3312 if (!item) {
3313 Py_DECREF(result);
3314 return NULL;
3315 }
3316 PyTuple_SET_ITEM(result, index-1, item);
3317 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003318
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003319 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003320}
3321
3322static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00003323match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00003324{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003325 PyObject* result;
3326 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003327 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00003328
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003329 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003330 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00003331 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003332 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003333
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003334 result = PyDict_New();
3335 if (!result || !self->pattern->groupindex)
3336 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00003337
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003338 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003339 if (!keys)
3340 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00003341
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003342 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00003343 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003344 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00003345 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003346 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003347 if (!key)
3348 goto failed;
3349 value = match_getslice(self, key, def);
3350 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003351 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003352 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003353 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00003354 status = PyDict_SetItem(result, key, value);
3355 Py_DECREF(value);
3356 if (status < 0)
3357 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003358 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003359
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003360 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00003361
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003362 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00003363
3364failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00003365 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00003366 Py_DECREF(result);
3367 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003368}
3369
3370static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003371match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003372{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003373 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003374
Guido van Rossumddefaf32007-01-14 03:31:43 +00003375 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003376 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003377 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003378
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003379 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003380
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003381 if (index < 0 || index >= self->groups) {
3382 PyErr_SetString(
3383 PyExc_IndexError,
3384 "no such group"
3385 );
3386 return NULL;
3387 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003388
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003389 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003390 return Py_BuildValue("i", self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00003391}
3392
3393static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003394match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003395{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003396 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003397
Guido van Rossumddefaf32007-01-14 03:31:43 +00003398 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003399 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003400 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003401
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003402 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003403
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003404 if (index < 0 || index >= self->groups) {
3405 PyErr_SetString(
3406 PyExc_IndexError,
3407 "no such group"
3408 );
3409 return NULL;
3410 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003411
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003412 /* mark is -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003413 return Py_BuildValue("i", self->mark[index*2+1]);
3414}
3415
3416LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003417_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003418{
3419 PyObject* pair;
3420 PyObject* item;
3421
3422 pair = PyTuple_New(2);
3423 if (!pair)
3424 return NULL;
3425
Christian Heimes217cfd12007-12-02 14:31:20 +00003426 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003427 if (!item)
3428 goto error;
3429 PyTuple_SET_ITEM(pair, 0, item);
3430
Christian Heimes217cfd12007-12-02 14:31:20 +00003431 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003432 if (!item)
3433 goto error;
3434 PyTuple_SET_ITEM(pair, 1, item);
3435
3436 return pair;
3437
3438 error:
3439 Py_DECREF(pair);
3440 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003441}
3442
3443static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003444match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00003445{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003446 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003447
Guido van Rossumddefaf32007-01-14 03:31:43 +00003448 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003449 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003450 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003451
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003452 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003453
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003454 if (index < 0 || index >= self->groups) {
3455 PyErr_SetString(
3456 PyExc_IndexError,
3457 "no such group"
3458 );
3459 return NULL;
3460 }
Guido van Rossumb700df92000-03-31 14:59:30 +00003461
Fredrik Lundh510c97b2000-09-02 16:36:57 +00003462 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003463 return _pair(self->mark[index*2], self->mark[index*2+1]);
3464}
3465
3466static PyObject*
3467match_regs(MatchObject* self)
3468{
3469 PyObject* regs;
3470 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003471 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003472
3473 regs = PyTuple_New(self->groups);
3474 if (!regs)
3475 return NULL;
3476
3477 for (index = 0; index < self->groups; index++) {
3478 item = _pair(self->mark[index*2], self->mark[index*2+1]);
3479 if (!item) {
3480 Py_DECREF(regs);
3481 return NULL;
3482 }
3483 PyTuple_SET_ITEM(regs, index, item);
3484 }
3485
3486 Py_INCREF(regs);
3487 self->regs = regs;
3488
3489 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00003490}
3491
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003492static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003493match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003494{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003495#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003496 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003497 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00003498
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003499 slots = 2 * (self->pattern->groups+1);
3500
3501 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3502 if (!copy)
3503 return NULL;
3504
3505 /* this value a constant, but any compiler should be able to
3506 figure that out all by itself */
3507 offset = offsetof(MatchObject, string);
3508
3509 Py_XINCREF(self->pattern);
3510 Py_XINCREF(self->string);
3511 Py_XINCREF(self->regs);
3512
3513 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003514 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003515
3516 return (PyObject*) copy;
3517#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003518 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003519 return NULL;
3520#endif
3521}
3522
3523static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003524match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003525{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003526#ifdef USE_BUILTIN_COPY
3527 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00003528
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003529 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003530 if (!copy)
3531 return NULL;
3532
3533 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3534 !deepcopy(&copy->string, memo) ||
3535 !deepcopy(&copy->regs, memo)) {
3536 Py_DECREF(copy);
3537 return NULL;
3538 }
3539
3540#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003541 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3542 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00003543#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00003544}
3545
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003546static PyMethodDef match_methods[] = {
Fredrik Lundh562586e2000-10-03 20:43:34 +00003547 {"group", (PyCFunction) match_group, METH_VARARGS},
3548 {"start", (PyCFunction) match_start, METH_VARARGS},
3549 {"end", (PyCFunction) match_end, METH_VARARGS},
3550 {"span", (PyCFunction) match_span, METH_VARARGS},
3551 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
3552 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003553 {"expand", (PyCFunction) match_expand, METH_O},
3554 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3555 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003556 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003557};
3558
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003559static PyObject *
3560match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00003561{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003562 if (self->lastindex >= 0)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03003563 return Py_BuildValue("i", self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003564 Py_INCREF(Py_None);
3565 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00003566}
3567
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003568static PyObject *
3569match_lastgroup_get(MatchObject *self)
3570{
3571 if (self->pattern->indexgroup && self->lastindex >= 0) {
3572 PyObject* result = PySequence_GetItem(
3573 self->pattern->indexgroup, self->lastindex
3574 );
3575 if (result)
3576 return result;
3577 PyErr_Clear();
3578 }
3579 Py_INCREF(Py_None);
3580 return Py_None;
3581}
3582
3583static PyObject *
3584match_regs_get(MatchObject *self)
3585{
3586 if (self->regs) {
3587 Py_INCREF(self->regs);
3588 return self->regs;
3589 } else
3590 return match_regs(self);
3591}
3592
3593static PyGetSetDef match_getset[] = {
3594 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
3595 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
3596 {"regs", (getter)match_regs_get, (setter)NULL},
3597 {NULL}
3598};
3599
3600#define MATCH_OFF(x) offsetof(MatchObject, x)
3601static PyMemberDef match_members[] = {
3602 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
3603 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
3604 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
3605 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
3606 {NULL}
3607};
3608
Guido van Rossumb700df92000-03-31 14:59:30 +00003609/* FIXME: implement setattr("string", None) as a special case (to
3610 detach the associated string, if any */
3611
Neal Norwitz57c179c2006-03-22 07:18:02 +00003612static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003613 PyVarObject_HEAD_INIT(NULL,0)
3614 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003615 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03003616 (destructor)match_dealloc, /* tp_dealloc */
3617 0, /* tp_print */
3618 0, /* tp_getattr */
3619 0, /* tp_setattr */
3620 0, /* tp_reserved */
3621 0, /* tp_repr */
3622 0, /* tp_as_number */
3623 0, /* tp_as_sequence */
3624 0, /* tp_as_mapping */
3625 0, /* tp_hash */
3626 0, /* tp_call */
3627 0, /* tp_str */
3628 0, /* tp_getattro */
3629 0, /* tp_setattro */
3630 0, /* tp_as_buffer */
3631 Py_TPFLAGS_DEFAULT, /* tp_flags */
3632 0, /* tp_doc */
3633 0, /* tp_traverse */
3634 0, /* tp_clear */
3635 0, /* tp_richcompare */
3636 0, /* tp_weaklistoffset */
3637 0, /* tp_iter */
3638 0, /* tp_iternext */
3639 match_methods, /* tp_methods */
3640 match_members, /* tp_members */
3641 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00003642};
3643
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003644static PyObject*
3645pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3646{
3647 /* create match object (from state object) */
3648
3649 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003650 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003651 char* base;
3652 int n;
3653
3654 if (status > 0) {
3655
3656 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00003657 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003658 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3659 2*(pattern->groups+1));
3660 if (!match)
3661 return NULL;
3662
3663 Py_INCREF(pattern);
3664 match->pattern = pattern;
3665
3666 Py_INCREF(state->string);
3667 match->string = state->string;
3668
3669 match->regs = NULL;
3670 match->groups = pattern->groups+1;
3671
3672 /* fill in group slices */
3673
3674 base = (char*) state->beginning;
3675 n = state->charsize;
3676
3677 match->mark[0] = ((char*) state->start - base) / n;
3678 match->mark[1] = ((char*) state->ptr - base) / n;
3679
3680 for (i = j = 0; i < pattern->groups; i++, j+=2)
3681 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3682 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3683 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3684 } else
3685 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3686
3687 match->pos = state->pos;
3688 match->endpos = state->endpos;
3689
3690 match->lastindex = state->lastindex;
3691
3692 return (PyObject*) match;
3693
3694 } else if (status == 0) {
3695
3696 /* no match */
3697 Py_INCREF(Py_None);
3698 return Py_None;
3699
3700 }
3701
3702 /* internal error */
3703 pattern_error(status);
3704 return NULL;
3705}
3706
3707
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003708/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003709/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003710
3711static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003712scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003713{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003714 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00003715 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003716 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003717}
3718
3719static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003720scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003721{
3722 SRE_STATE* state = &self->state;
3723 PyObject* match;
3724 int status;
3725
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003726 state_reset(state);
3727
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003728 state->ptr = state->start;
3729
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003730 if (state->logical_charsize == 1) {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003731 status = sre_match(state, PatternObject_GetCode(self->pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003732 } else {
Gustavo Niemeyer2cbdc2a2003-12-13 20:32:08 +00003733 status = sre_umatch(state, PatternObject_GetCode(self->pattern));
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003734 }
Thomas Wouters89f507f2006-12-13 04:49:30 +00003735 if (PyErr_Occurred())
3736 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003737
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003738 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003739 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003740
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003741 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003742 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003743 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003744 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003745
3746 return match;
3747}
3748
3749
3750static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003751scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003752{
3753 SRE_STATE* state = &self->state;
3754 PyObject* match;
3755 int status;
3756
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00003757 state_reset(state);
3758
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003759 state->ptr = state->start;
3760
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003761 if (state->logical_charsize == 1) {
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003762 status = sre_search(state, PatternObject_GetCode(self->pattern));
3763 } else {
3764 status = sre_usearch(state, PatternObject_GetCode(self->pattern));
3765 }
Thomas Wouters89f507f2006-12-13 04:49:30 +00003766 if (PyErr_Occurred())
3767 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003768
Fredrik Lundh75f2d672000-06-29 11:34:28 +00003769 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003770 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003771
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00003772 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003773 state->start = (void*) ((char*) state->ptr + state->charsize);
3774 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003775 state->start = state->ptr;
3776
3777 return match;
3778}
3779
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00003780static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003781 {"match", (PyCFunction) scanner_match, METH_NOARGS},
3782 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003783 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003784};
3785
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003786#define SCAN_OFF(x) offsetof(ScannerObject, x)
3787static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03003788 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003789 {NULL} /* Sentinel */
3790};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003791
Neal Norwitz57c179c2006-03-22 07:18:02 +00003792static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003793 PyVarObject_HEAD_INIT(NULL, 0)
3794 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003795 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00003796 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03003797 0, /* tp_print */
3798 0, /* tp_getattr */
3799 0, /* tp_setattr */
3800 0, /* tp_reserved */
3801 0, /* tp_repr */
3802 0, /* tp_as_number */
3803 0, /* tp_as_sequence */
3804 0, /* tp_as_mapping */
3805 0, /* tp_hash */
3806 0, /* tp_call */
3807 0, /* tp_str */
3808 0, /* tp_getattro */
3809 0, /* tp_setattro */
3810 0, /* tp_as_buffer */
3811 Py_TPFLAGS_DEFAULT, /* tp_flags */
3812 0, /* tp_doc */
3813 0, /* tp_traverse */
3814 0, /* tp_clear */
3815 0, /* tp_richcompare */
3816 0, /* tp_weaklistoffset */
3817 0, /* tp_iter */
3818 0, /* tp_iternext */
3819 scanner_methods, /* tp_methods */
3820 scanner_members, /* tp_members */
3821 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00003822};
3823
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003824static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06003825pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003826{
3827 /* create search state object */
3828
3829 ScannerObject* self;
3830
3831 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003832 Py_ssize_t start = 0;
3833 Py_ssize_t end = PY_SSIZE_T_MAX;
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06003834 static char* kwlist[] = { "source", "pos", "endpos", NULL };
3835 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:scanner", kwlist,
3836 &string, &start, &end))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003837 return NULL;
3838
3839 /* create scanner object */
3840 self = PyObject_NEW(ScannerObject, &Scanner_Type);
3841 if (!self)
3842 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00003843 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003844
3845 string = state_init(&self->state, pattern, string, start, end);
3846 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00003847 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003848 return NULL;
3849 }
3850
3851 Py_INCREF(pattern);
3852 self->pattern = (PyObject*) pattern;
3853
3854 return (PyObject*) self;
3855}
3856
Guido van Rossumb700df92000-03-31 14:59:30 +00003857static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00003858 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003859 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00003860 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00003861 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00003862};
3863
Martin v. Löwis1a214512008-06-11 05:26:20 +00003864static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03003865 PyModuleDef_HEAD_INIT,
3866 "_" SRE_MODULE,
3867 NULL,
3868 -1,
3869 _functions,
3870 NULL,
3871 NULL,
3872 NULL,
3873 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00003874};
3875
3876PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00003877{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003878 PyObject* m;
3879 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00003880 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003881
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00003882 /* Patch object types */
3883 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
3884 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00003885 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00003886
Martin v. Löwis1a214512008-06-11 05:26:20 +00003887 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00003888 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03003889 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00003890 d = PyModule_GetDict(m);
3891
Christian Heimes217cfd12007-12-02 14:31:20 +00003892 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00003893 if (x) {
3894 PyDict_SetItemString(d, "MAGIC", x);
3895 Py_DECREF(x);
3896 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00003897
Christian Heimes217cfd12007-12-02 14:31:20 +00003898 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00003899 if (x) {
3900 PyDict_SetItemString(d, "CODESIZE", x);
3901 Py_DECREF(x);
3902 }
3903
Neal Norwitzfe537132007-08-26 03:55:15 +00003904 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00003905 if (x) {
3906 PyDict_SetItemString(d, "copyright", x);
3907 Py_DECREF(x);
3908 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00003909 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00003910}
3911
Fredrik Lundh436c3d582000-06-29 08:58:44 +00003912#endif /* !defined(SRE_RECURSIVE) */
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00003913
3914/* vim:ts=4:sw=4:et
3915*/