blob: 55a86c2901818f1c3d366f3927ddaac74094c1cc [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Fredrik Lundh9c7eab82001-04-15 19:00:58 +000038static char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
65/* enables fast searching */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000066#define USE_FAST_SEARCH
67
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000068/* enables copy/deepcopy handling (work in progress) */
69#undef USE_BUILTIN_COPY
70
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000071/* -------------------------------------------------------------------- */
72
Fredrik Lundh80946112000-06-29 18:03:25 +000073#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000074#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000075#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000076/* fastest possible local call under MSVC */
77#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000078#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000079#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#else
81#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000082#endif
83
84/* error codes */
85#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000086#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000087#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000088#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000089#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000090
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000091#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000092#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000093#else
94#define TRACE(v)
95#endif
96
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097/* -------------------------------------------------------------------- */
98/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000100/* default character predicates (run sre_chars.py to regenerate tables) */
101
102#define SRE_DIGIT_MASK 1
103#define SRE_SPACE_MASK 2
104#define SRE_LINEBREAK_MASK 4
105#define SRE_ALNUM_MASK 8
106#define SRE_WORD_MASK 16
107
Fredrik Lundh21009b92001-09-18 18:47:09 +0000108/* FIXME: this assumes ASCII. create tables in init_sre() instead */
109
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000110static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1112, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
11325, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11424, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1150, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
11624, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
117
Fredrik Lundhb389df32000-06-29 12:48:37 +0000118static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Fredrik Lundh436c3d582000-06-29 08:58:44 +000011910, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
12027, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
12144, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
12261, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
123108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
124122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
125106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
126120, 121, 122, 123, 124, 125, 126, 127 };
127
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000128#define SRE_IS_DIGIT(ch)\
129 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
130#define SRE_IS_SPACE(ch)\
131 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
132#define SRE_IS_LINEBREAK(ch)\
133 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
134#define SRE_IS_ALNUM(ch)\
135 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
136#define SRE_IS_WORD(ch)\
137 ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
Guido van Rossumb700df92000-03-31 14:59:30 +0000138
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000139static unsigned int sre_lower(unsigned int ch)
140{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000141 return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000142}
143
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000144/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000145/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
146 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000147#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000148#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
149
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000150static unsigned int sre_lower_locale(unsigned int ch)
151{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000152 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000153}
154
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000155/* unicode-specific character predicates */
156
Victor Stinner0058b862011-09-29 03:27:47 +0200157#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
158#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
159#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
160#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
161#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000162
163static unsigned int sre_lower_unicode(unsigned int ch)
164{
Victor Stinner0058b862011-09-29 03:27:47 +0200165 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000166}
167
Guido van Rossumb700df92000-03-31 14:59:30 +0000168LOCAL(int)
169sre_category(SRE_CODE category, unsigned int ch)
170{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000172
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000173 case SRE_CATEGORY_DIGIT:
174 return SRE_IS_DIGIT(ch);
175 case SRE_CATEGORY_NOT_DIGIT:
176 return !SRE_IS_DIGIT(ch);
177 case SRE_CATEGORY_SPACE:
178 return SRE_IS_SPACE(ch);
179 case SRE_CATEGORY_NOT_SPACE:
180 return !SRE_IS_SPACE(ch);
181 case SRE_CATEGORY_WORD:
182 return SRE_IS_WORD(ch);
183 case SRE_CATEGORY_NOT_WORD:
184 return !SRE_IS_WORD(ch);
185 case SRE_CATEGORY_LINEBREAK:
186 return SRE_IS_LINEBREAK(ch);
187 case SRE_CATEGORY_NOT_LINEBREAK:
188 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000189
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000190 case SRE_CATEGORY_LOC_WORD:
191 return SRE_LOC_IS_WORD(ch);
192 case SRE_CATEGORY_LOC_NOT_WORD:
193 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000194
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 case SRE_CATEGORY_UNI_DIGIT:
196 return SRE_UNI_IS_DIGIT(ch);
197 case SRE_CATEGORY_UNI_NOT_DIGIT:
198 return !SRE_UNI_IS_DIGIT(ch);
199 case SRE_CATEGORY_UNI_SPACE:
200 return SRE_UNI_IS_SPACE(ch);
201 case SRE_CATEGORY_UNI_NOT_SPACE:
202 return !SRE_UNI_IS_SPACE(ch);
203 case SRE_CATEGORY_UNI_WORD:
204 return SRE_UNI_IS_WORD(ch);
205 case SRE_CATEGORY_UNI_NOT_WORD:
206 return !SRE_UNI_IS_WORD(ch);
207 case SRE_CATEGORY_UNI_LINEBREAK:
208 return SRE_UNI_IS_LINEBREAK(ch);
209 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
210 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000211 }
212 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000213}
214
215/* helpers */
216
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000217static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000218data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000219{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000220 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000222 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000223 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000224 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225}
226
227static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000229{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000231 minsize = state->data_stack_base+size;
232 cursize = state->data_stack_size;
233 if (cursize < minsize) {
234 void* stack;
235 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300236 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000237 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000238 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000239 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000240 return SRE_ERROR_MEMORY;
241 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000242 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000243 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000244 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000245 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000246}
247
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000248/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000249
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300250#define SRE_CHAR Py_UCS1
251#define SIZEOF_SRE_CHAR 1
252#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300253#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000254
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300255/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000256
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300257#define SRE_CHAR Py_UCS2
258#define SIZEOF_SRE_CHAR 2
259#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300260#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000261
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300262/* generate 32-bit unicode version */
263
264#define SRE_CHAR Py_UCS4
265#define SIZEOF_SRE_CHAR 4
266#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300267#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000268
269/* -------------------------------------------------------------------- */
270/* factories and destructors */
271
272/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100273static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600274static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
Guido van Rossumb700df92000-03-31 14:59:30 +0000275
276static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000277sre_codesize(PyObject* self, PyObject *unused)
Guido van Rossumb700df92000-03-31 14:59:30 +0000278{
Antoine Pitrou43fb54c2012-12-02 12:52:36 +0100279 return PyLong_FromSize_t(sizeof(SRE_CODE));
Guido van Rossumb700df92000-03-31 14:59:30 +0000280}
281
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000282static PyObject *
Fredrik Lundhb389df32000-06-29 12:48:37 +0000283sre_getlower(PyObject* self, PyObject* args)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000284{
285 int character, flags;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000286 if (!PyArg_ParseTuple(args, "ii", &character, &flags))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000287 return NULL;
288 if (flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000289 return Py_BuildValue("i", sre_lower_locale(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000290 if (flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000291 return Py_BuildValue("i", sre_lower_unicode(character));
Fredrik Lundhb389df32000-06-29 12:48:37 +0000292 return Py_BuildValue("i", sre_lower(character));
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000293}
294
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000295LOCAL(void)
296state_reset(SRE_STATE* state)
297{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000298 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000299 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000300
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000301 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000302 state->lastindex = -1;
303
304 state->repeat = NULL;
305
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000306 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307}
308
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000309static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200310getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300311 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600312 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000313{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000314 /* given a python object, return a data pointer, a length (in
315 characters), and a character size. return NULL if the object
316 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000317
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000318 /* Unicode objects do not support the buffer API. So, get the data
319 directly instead. */
320 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200321 if (PyUnicode_READY(string) == -1)
322 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200324 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300325 *p_isbytes = 0;
326 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 }
328
Victor Stinner0058b862011-09-29 03:27:47 +0200329 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300330 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
331 PyErr_SetString(PyExc_TypeError, "expected string or buffer");
332 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000333 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000334
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300335 *p_length = view->len;
336 *p_charsize = 1;
337 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000338
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (view->buf == NULL) {
340 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
341 PyBuffer_Release(view);
342 view->buf = NULL;
343 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000344 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300345 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000346}
347
348LOCAL(PyObject*)
349state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000351{
352 /* prepare state object */
353
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000354 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300355 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000356 void* ptr;
357
358 memset(state, 0, sizeof(SRE_STATE));
359
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000360 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000361 state->lastindex = -1;
362
Benjamin Petersone48944b2012-03-07 14:50:25 -0600363 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600366 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000367
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300368 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600369 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300370 "can't use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600371 goto err;
372 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300373 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600374 PyErr_SetString(PyExc_TypeError,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300375 "can't use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600376 goto err;
377 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000378
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000379 /* adjust boundaries */
380 if (start < 0)
381 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000382 else if (start > length)
383 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000384
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 if (end < 0)
386 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000387 else if (end > length)
388 end = length;
389
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000391 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000394
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000395 state->start = (void*) ((char*) ptr + start * state->charsize);
396 state->end = (void*) ((char*) ptr + end * state->charsize);
397
398 Py_INCREF(string);
399 state->string = string;
400 state->pos = start;
401 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000402
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000403 if (pattern->flags & SRE_FLAG_LOCALE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000404 state->lower = sre_lower_locale;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000405 else if (pattern->flags & SRE_FLAG_UNICODE)
Fredrik Lundhb389df32000-06-29 12:48:37 +0000406 state->lower = sre_lower_unicode;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000407 else
Fredrik Lundhb389df32000-06-29 12:48:37 +0000408 state->lower = sre_lower;
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000409
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000410 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600411 err:
412 if (state->buffer.buf)
413 PyBuffer_Release(&state->buffer);
414 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000415}
416
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000417LOCAL(void)
418state_fini(SRE_STATE* state)
419{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 if (state->buffer.buf)
421 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000422 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000423 data_stack_dealloc(state);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000424}
425
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000426/* calculate offset from start of string */
427#define STATE_OFFSET(state, member)\
428 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
429
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000430LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300431getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300432 PyObject* string, Py_ssize_t start, Py_ssize_t end)
433{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300434 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300435 if (PyBytes_CheckExact(string) &&
436 start == 0 && end == PyBytes_GET_SIZE(string)) {
437 Py_INCREF(string);
438 return string;
439 }
440 return PyBytes_FromStringAndSize(
441 (const char *)ptr + start, end - start);
442 }
443 else {
444 return PyUnicode_Substring(string, start, end);
445 }
446}
447
448LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000450{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000451 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000452
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000453 index = (index - 1) * 2;
454
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000455 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000456 if (empty)
457 /* want empty string */
458 i = j = 0;
459 else {
460 Py_INCREF(Py_None);
461 return Py_None;
462 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000463 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000464 i = STATE_OFFSET(state, state->mark[index]);
465 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000466 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000467
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300468 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469}
470
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000471static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100472pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000473{
474 switch (status) {
475 case SRE_ERROR_RECURSION_LIMIT:
476 PyErr_SetString(
477 PyExc_RuntimeError,
478 "maximum recursion limit exceeded"
479 );
480 break;
481 case SRE_ERROR_MEMORY:
482 PyErr_NoMemory();
483 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000484 case SRE_ERROR_INTERRUPTED:
485 /* An exception has already been raised, so let it fly */
486 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000487 default:
488 /* other error codes indicate compiler/engine bugs */
489 PyErr_SetString(
490 PyExc_RuntimeError,
491 "internal error in regular expression engine"
492 );
493 }
494}
495
Guido van Rossumb700df92000-03-31 14:59:30 +0000496static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000497pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000498{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000499 if (self->weakreflist != NULL)
500 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000501 Py_XDECREF(self->pattern);
502 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000503 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000504 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000505}
506
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300507LOCAL(Py_ssize_t)
508sre_match(SRE_STATE* state, SRE_CODE* pattern)
509{
510 if (state->charsize == 1)
511 return sre_ucs1_match(state, pattern);
512 if (state->charsize == 2)
513 return sre_ucs2_match(state, pattern);
514 assert(state->charsize == 4);
515 return sre_ucs4_match(state, pattern);
516}
517
518LOCAL(Py_ssize_t)
519sre_search(SRE_STATE* state, SRE_CODE* pattern)
520{
521 if (state->charsize == 1)
522 return sre_ucs1_search(state, pattern);
523 if (state->charsize == 2)
524 return sre_ucs2_search(state, pattern);
525 assert(state->charsize == 4);
526 return sre_ucs4_search(state, pattern);
527}
528
Guido van Rossumb700df92000-03-31 14:59:30 +0000529static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000530pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000531{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000532 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100533 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000534
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000535 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000536 Py_ssize_t start = 0;
537 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000538 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000539 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000540 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000541 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000542
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000543 string = state_init(&state, self, string, start, end);
544 if (!string)
545 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000546
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000547 state.ptr = state.start;
548
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000549 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
550
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300551 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000552
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000553 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Thomas Wouters89f507f2006-12-13 04:49:30 +0000554 if (PyErr_Occurred())
555 return NULL;
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000556
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000557 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000558
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000559 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000560}
561
562static PyObject*
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200563pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
564{
565 SRE_STATE state;
566 Py_ssize_t status;
567
568 PyObject* string;
569 Py_ssize_t start = 0;
570 Py_ssize_t end = PY_SSIZE_T_MAX;
571 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
572 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist,
573 &string, &start, &end))
574 return NULL;
575
576 string = state_init(&state, self, string, start, end);
577 if (!string)
578 return NULL;
579
580 state.match_all = 1;
581 state.ptr = state.start;
582
583 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
584
585 status = sre_match(&state, PatternObject_GetCode(self));
586
587 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
588 if (PyErr_Occurred())
589 return NULL;
590
591 state_fini(&state);
592
593 return pattern_new_match(self, &state, status);
594}
595
596static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000597pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000598{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000599 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100600 Py_ssize_t status;
Guido van Rossumb700df92000-03-31 14:59:30 +0000601
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000602 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000603 Py_ssize_t start = 0;
604 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000605 static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000606 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000607 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000608 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000609
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000610 string = state_init(&state, self, string, start, end);
611 if (!string)
612 return NULL;
613
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000614 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
615
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300616 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000617
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000618 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
619
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000620 state_fini(&state);
Guido van Rossumb700df92000-03-31 14:59:30 +0000621
Thomas Wouters89f507f2006-12-13 04:49:30 +0000622 if (PyErr_Occurred())
623 return NULL;
624
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000625 return pattern_new_match(self, &state, status);
Guido van Rossumb700df92000-03-31 14:59:30 +0000626}
627
628static PyObject*
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000629call(char* module, char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000630{
631 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000632 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000633 PyObject* func;
634 PyObject* result;
635
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000636 if (!args)
637 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000638 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000639 if (!name)
640 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000641 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000642 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000643 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000644 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000645 func = PyObject_GetAttrString(mod, function);
646 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000647 if (!func)
648 return NULL;
649 result = PyObject_CallObject(func, args);
650 Py_DECREF(func);
651 Py_DECREF(args);
652 return result;
653}
654
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000655#ifdef USE_BUILTIN_COPY
656static int
657deepcopy(PyObject** object, PyObject* memo)
658{
659 PyObject* copy;
660
661 copy = call(
662 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000663 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000664 );
665 if (!copy)
666 return 0;
667
668 Py_DECREF(*object);
669 *object = copy;
670
671 return 1; /* success */
672}
673#endif
674
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000675static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +0000676pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +0000677{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 SRE_STATE state;
679 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100680 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000681 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000682
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000683 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000684 Py_ssize_t start = 0;
685 Py_ssize_t end = PY_SSIZE_T_MAX;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000686 static char* kwlist[] = { "source", "pos", "endpos", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000687 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
Fredrik Lundh562586e2000-10-03 20:43:34 +0000688 &string, &start, &end))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000690
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000691 string = state_init(&state, self, string, start, end);
692 if (!string)
693 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000694
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000696 if (!list) {
697 state_fini(&state);
698 return NULL;
699 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000700
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000701 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000702
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000703 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000704
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000705 state_reset(&state);
706
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000707 state.ptr = state.start;
708
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300709 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300710 if (PyErr_Occurred())
711 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000712
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000713 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000714 if (status == 0)
715 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000716 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000717 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000718 }
Tim Peters3d563502006-01-21 02:47:53 +0000719
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000720 /* don't bother to build a match object */
721 switch (self->groups) {
722 case 0:
723 b = STATE_OFFSET(&state, state.start);
724 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300725 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300726 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000727 if (!item)
728 goto error;
729 break;
730 case 1:
731 item = state_getslice(&state, 1, string, 1);
732 if (!item)
733 goto error;
734 break;
735 default:
736 item = PyTuple_New(self->groups);
737 if (!item)
738 goto error;
739 for (i = 0; i < self->groups; i++) {
740 PyObject* o = state_getslice(&state, i+1, string, 1);
741 if (!o) {
742 Py_DECREF(item);
743 goto error;
744 }
745 PyTuple_SET_ITEM(item, i, o);
746 }
747 break;
748 }
749
750 status = PyList_Append(list, item);
751 Py_DECREF(item);
752 if (status < 0)
753 goto error;
754
755 if (state.ptr == state.start)
756 state.start = (void*) ((char*) state.ptr + state.charsize);
757 else
758 state.start = state.ptr;
759
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000760 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000761
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 state_fini(&state);
763 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000764
765error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000766 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000767 state_fini(&state);
768 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000769
Guido van Rossumb700df92000-03-31 14:59:30 +0000770}
771
Fredrik Lundh703ce812001-10-24 22:16:30 +0000772static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600773pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
Fredrik Lundh703ce812001-10-24 22:16:30 +0000774{
775 PyObject* scanner;
776 PyObject* search;
777 PyObject* iterator;
778
Sean Reifschneider7b3c9752012-03-12 18:22:38 -0600779 scanner = pattern_scanner(pattern, args, kw);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000780 if (!scanner)
781 return NULL;
782
783 search = PyObject_GetAttrString(scanner, "search");
784 Py_DECREF(scanner);
785 if (!search)
786 return NULL;
787
788 iterator = PyCallIter_New(search, Py_None);
789 Py_DECREF(search);
790
791 return iterator;
792}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000793
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000794static PyObject*
795pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
796{
797 SRE_STATE state;
798 PyObject* list;
799 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100800 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000801 Py_ssize_t n;
802 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000803 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000804
805 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000806 Py_ssize_t maxsplit = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +0000807 static char* kwlist[] = { "source", "maxsplit", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000808 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000809 &string, &maxsplit))
810 return NULL;
811
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000812 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000813 if (!string)
814 return NULL;
815
816 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000817 if (!list) {
818 state_fini(&state);
819 return NULL;
820 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000821
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000822 n = 0;
823 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000824
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000825 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000826
827 state_reset(&state);
828
829 state.ptr = state.start;
830
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300831 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300832 if (PyErr_Occurred())
833 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000834
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000835 if (status <= 0) {
836 if (status == 0)
837 break;
838 pattern_error(status);
839 goto error;
840 }
Tim Peters3d563502006-01-21 02:47:53 +0000841
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000842 if (state.start == state.ptr) {
843 if (last == state.end)
844 break;
845 /* skip one character */
846 state.start = (void*) ((char*) state.ptr + state.charsize);
847 continue;
848 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000849
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000850 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300851 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000852 string, STATE_OFFSET(&state, last),
853 STATE_OFFSET(&state, state.start)
854 );
855 if (!item)
856 goto error;
857 status = PyList_Append(list, item);
858 Py_DECREF(item);
859 if (status < 0)
860 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000861
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000862 /* add groups (if any) */
863 for (i = 0; i < self->groups; i++) {
864 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000865 if (!item)
866 goto error;
867 status = PyList_Append(list, item);
868 Py_DECREF(item);
869 if (status < 0)
870 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000871 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000872
873 n = n + 1;
874
875 last = state.start = state.ptr;
876
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000877 }
878
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000879 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300880 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000881 string, STATE_OFFSET(&state, last), state.endpos
882 );
883 if (!item)
884 goto error;
885 status = PyList_Append(list, item);
886 Py_DECREF(item);
887 if (status < 0)
888 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000889
890 state_fini(&state);
891 return list;
892
893error:
894 Py_DECREF(list);
895 state_fini(&state);
896 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000897
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000898}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000899
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000900static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000901pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000902 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000903{
904 SRE_STATE state;
905 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300906 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000907 PyObject* item;
908 PyObject* filter;
909 PyObject* args;
910 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000911 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100912 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000913 Py_ssize_t n;
914 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300915 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000916 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600917 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000918
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000919 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000920 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000921 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000922 Py_INCREF(filter);
923 filter_is_callable = 1;
924 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000925 /* if not callable, check if it's a literal string */
926 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600927 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300928 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200929 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000930 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300931 if (charsize == 1)
932 literal = memchr(ptr, '\\', n) == NULL;
933 else
934 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000935 } else {
936 PyErr_Clear();
937 literal = 0;
938 }
Benjamin Petersone48944b2012-03-07 14:50:25 -0600939 if (view.buf)
940 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000941 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000942 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000943 Py_INCREF(filter);
944 filter_is_callable = 0;
945 } else {
946 /* not a literal; hand it over to the template compiler */
947 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +0000948 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000949 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000950 );
951 if (!filter)
952 return NULL;
953 filter_is_callable = PyCallable_Check(filter);
954 }
Fredrik Lundhdac58492001-10-21 21:48:30 +0000955 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000956
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000957 string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
Fredrik Lundh82b23072001-12-09 16:13:15 +0000958 if (!string) {
959 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000960 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +0000961 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000962
963 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000964 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +0000965 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000966 state_fini(&state);
967 return NULL;
968 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000969
970 n = i = 0;
971
972 while (!count || n < count) {
973
974 state_reset(&state);
975
976 state.ptr = state.start;
977
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300978 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300979 if (PyErr_Occurred())
980 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000981
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000982 if (status <= 0) {
983 if (status == 0)
984 break;
985 pattern_error(status);
986 goto error;
987 }
Tim Peters3d563502006-01-21 02:47:53 +0000988
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000989 b = STATE_OFFSET(&state, state.start);
990 e = STATE_OFFSET(&state, state.ptr);
991
992 if (i < b) {
993 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300994 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300995 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000996 if (!item)
997 goto error;
998 status = PyList_Append(list, item);
999 Py_DECREF(item);
1000 if (status < 0)
1001 goto error;
1002
1003 } else if (i == b && i == e && n > 0)
1004 /* ignore empty match on latest position */
1005 goto next;
1006
1007 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001008 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001009 match = pattern_new_match(self, &state, 1);
1010 if (!match)
1011 goto error;
Raymond Hettinger8ae46892003-10-12 19:09:37 +00001012 args = PyTuple_Pack(1, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001013 if (!args) {
Guido van Rossum4e173842001-12-07 04:25:10 +00001014 Py_DECREF(match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001015 goto error;
1016 }
1017 item = PyObject_CallObject(filter, args);
1018 Py_DECREF(args);
1019 Py_DECREF(match);
1020 if (!item)
1021 goto error;
1022 } else {
1023 /* filter is literal string */
1024 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001025 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001026 }
1027
1028 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001029 if (item != Py_None) {
1030 status = PyList_Append(list, item);
1031 Py_DECREF(item);
1032 if (status < 0)
1033 goto error;
1034 }
Tim Peters3d563502006-01-21 02:47:53 +00001035
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001036 i = e;
1037 n = n + 1;
1038
1039next:
1040 /* move on */
1041 if (state.ptr == state.start)
1042 state.start = (void*) ((char*) state.ptr + state.charsize);
1043 else
1044 state.start = state.ptr;
1045
1046 }
1047
1048 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001049 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001050 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001051 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001052 if (!item)
1053 goto error;
1054 status = PyList_Append(list, item);
1055 Py_DECREF(item);
1056 if (status < 0)
1057 goto error;
1058 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001059
1060 state_fini(&state);
1061
Guido van Rossum4e173842001-12-07 04:25:10 +00001062 Py_DECREF(filter);
1063
Fredrik Lundhdac58492001-10-21 21:48:30 +00001064 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001065 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001066 if (!joiner) {
1067 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001068 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001069 }
1070 if (PyList_GET_SIZE(list) == 0) {
1071 Py_DECREF(list);
1072 item = joiner;
1073 }
1074 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001075 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001076 item = _PyBytes_Join(joiner, list);
1077 else
1078 item = PyUnicode_Join(joiner, list);
1079 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001080 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001081 if (!item)
1082 return NULL;
1083 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001084
1085 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001086 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001087
1088 return item;
1089
1090error:
1091 Py_DECREF(list);
1092 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001093 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001094 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001095
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001096}
1097
1098static PyObject*
1099pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
1100{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001101 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001102 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001103 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001104 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001105 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001106 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001107 return NULL;
1108
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001109 return pattern_subx(self, ptemplate, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110}
1111
1112static PyObject*
1113pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
1114{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001115 PyObject* ptemplate;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001116 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001117 Py_ssize_t count = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001118 static char* kwlist[] = { "repl", "string", "count", NULL };
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001119 if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001120 &ptemplate, &string, &count))
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001121 return NULL;
1122
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001123 return pattern_subx(self, ptemplate, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001124}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001125
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001126static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001127pattern_copy(PatternObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001128{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001129#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001130 PatternObject* copy;
1131 int offset;
1132
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001133 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1134 if (!copy)
1135 return NULL;
1136
1137 offset = offsetof(PatternObject, groups);
1138
1139 Py_XINCREF(self->groupindex);
1140 Py_XINCREF(self->indexgroup);
1141 Py_XINCREF(self->pattern);
1142
1143 memcpy((char*) copy + offset, (char*) self + offset,
1144 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001145 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001146
1147 return (PyObject*) copy;
1148#else
1149 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1150 return NULL;
1151#endif
1152}
1153
1154static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001155pattern_deepcopy(PatternObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001156{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001157#ifdef USE_BUILTIN_COPY
1158 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001159
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001160 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001161 if (!copy)
1162 return NULL;
1163
1164 if (!deepcopy(&copy->groupindex, memo) ||
1165 !deepcopy(&copy->indexgroup, memo) ||
1166 !deepcopy(&copy->pattern, memo)) {
1167 Py_DECREF(copy);
1168 return NULL;
1169 }
1170
1171#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001172 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1173 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001174#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001175}
1176
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001177static PyObject *
1178pattern_repr(PatternObject *obj)
1179{
1180 static const struct {
1181 const char *name;
1182 int value;
1183 } flag_names[] = {
1184 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1185 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1186 {"re.LOCALE", SRE_FLAG_LOCALE},
1187 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1188 {"re.DOTALL", SRE_FLAG_DOTALL},
1189 {"re.UNICODE", SRE_FLAG_UNICODE},
1190 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1191 {"re.DEBUG", SRE_FLAG_DEBUG},
1192 {"re.ASCII", SRE_FLAG_ASCII},
1193 };
1194 PyObject *result = NULL;
1195 PyObject *flag_items;
1196 int i;
1197 int flags = obj->flags;
1198
1199 /* Omit re.UNICODE for valid string patterns. */
1200 if (obj->isbytes == 0 &&
1201 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1202 SRE_FLAG_UNICODE)
1203 flags &= ~SRE_FLAG_UNICODE;
1204
1205 flag_items = PyList_New(0);
1206 if (!flag_items)
1207 return NULL;
1208
1209 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1210 if (flags & flag_names[i].value) {
1211 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1212 if (!item)
1213 goto done;
1214
1215 if (PyList_Append(flag_items, item) < 0) {
1216 Py_DECREF(item);
1217 goto done;
1218 }
1219 Py_DECREF(item);
1220 flags &= ~flag_names[i].value;
1221 }
1222 }
1223 if (flags) {
1224 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1225 if (!item)
1226 goto done;
1227
1228 if (PyList_Append(flag_items, item) < 0) {
1229 Py_DECREF(item);
1230 goto done;
1231 }
1232 Py_DECREF(item);
1233 }
1234
1235 if (PyList_Size(flag_items) > 0) {
1236 PyObject *flags_result;
1237 PyObject *sep = PyUnicode_FromString("|");
1238 if (!sep)
1239 goto done;
1240 flags_result = PyUnicode_Join(sep, flag_items);
1241 Py_DECREF(sep);
1242 if (!flags_result)
1243 goto done;
1244 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1245 obj->pattern, flags_result);
1246 Py_DECREF(flags_result);
1247 }
1248 else {
1249 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1250 }
1251
1252done:
1253 Py_DECREF(flag_items);
1254 return result;
1255}
1256
Raymond Hettinger94478742004-09-24 04:31:19 +00001257PyDoc_STRVAR(pattern_match_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001258"match(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001259 Matches zero or more characters at the beginning of the string");
1260
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001261PyDoc_STRVAR(pattern_fullmatch_doc,
1262"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
1263 Matches against all of the string");
1264
Raymond Hettinger94478742004-09-24 04:31:19 +00001265PyDoc_STRVAR(pattern_search_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001266"search(string[, pos[, endpos]]) -> match object or None.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001267 Scan through string looking for a match, and return a corresponding\n\
Andrew Svetlov0b64c142012-12-25 18:48:54 +02001268 match object instance. Return None if no position in the string matches.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001269
1270PyDoc_STRVAR(pattern_split_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001271"split(string[, maxsplit = 0]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001272 Split string by the occurrences of pattern.");
1273
1274PyDoc_STRVAR(pattern_findall_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001275"findall(string[, pos[, endpos]]) -> list.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001276 Return a list of all non-overlapping matches of pattern in string.");
1277
1278PyDoc_STRVAR(pattern_finditer_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001279"finditer(string[, pos[, endpos]]) -> iterator.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001280 Return an iterator over all non-overlapping matches for the \n\
1281 RE pattern in string. For each match, the iterator returns a\n\
1282 match object.");
1283
1284PyDoc_STRVAR(pattern_sub_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001285"sub(repl, string[, count = 0]) -> newstring.\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001286 Return the string obtained by replacing the leftmost non-overlapping\n\
Tim Peters3d563502006-01-21 02:47:53 +00001287 occurrences of pattern in string by the replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001288
1289PyDoc_STRVAR(pattern_subn_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02001290"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
Raymond Hettinger94478742004-09-24 04:31:19 +00001291 Return the tuple (new_string, number_of_subs_made) found by replacing\n\
1292 the leftmost non-overlapping occurrences of pattern with the\n\
Tim Peters3d563502006-01-21 02:47:53 +00001293 replacement repl.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001294
1295PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1296
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001297static PyMethodDef pattern_methods[] = {
Tim Peters3d563502006-01-21 02:47:53 +00001298 {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001299 pattern_match_doc},
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02001300 {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
1301 pattern_fullmatch_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001302 {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001303 pattern_search_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001304 {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001305 pattern_sub_doc},
Raymond Hettinger94478742004-09-24 04:31:19 +00001306 {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001307 pattern_subn_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001308 {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001309 pattern_split_doc},
Tim Peters3d563502006-01-21 02:47:53 +00001310 {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001311 pattern_findall_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001312 {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001313 pattern_finditer_doc},
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06001314 {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001315 {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
1316 {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001317 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00001318};
1319
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00001320#define PAT_OFF(x) offsetof(PatternObject, x)
1321static PyMemberDef pattern_members[] = {
1322 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
1323 {"flags", T_INT, PAT_OFF(flags), READONLY},
1324 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
1325 {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
1326 {NULL} /* Sentinel */
1327};
Guido van Rossumb700df92000-03-31 14:59:30 +00001328
Neal Norwitz57c179c2006-03-22 07:18:02 +00001329static PyTypeObject Pattern_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001330 PyVarObject_HEAD_INIT(NULL, 0)
1331 "_" SRE_MODULE ".SRE_Pattern",
Fredrik Lundh6f013982000-07-03 18:44:21 +00001332 sizeof(PatternObject), sizeof(SRE_CODE),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001333 (destructor)pattern_dealloc, /* tp_dealloc */
1334 0, /* tp_print */
1335 0, /* tp_getattr */
1336 0, /* tp_setattr */
1337 0, /* tp_reserved */
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001338 (reprfunc)pattern_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001339 0, /* tp_as_number */
1340 0, /* tp_as_sequence */
1341 0, /* tp_as_mapping */
1342 0, /* tp_hash */
1343 0, /* tp_call */
1344 0, /* tp_str */
1345 0, /* tp_getattro */
1346 0, /* tp_setattro */
1347 0, /* tp_as_buffer */
1348 Py_TPFLAGS_DEFAULT, /* tp_flags */
1349 pattern_doc, /* tp_doc */
1350 0, /* tp_traverse */
1351 0, /* tp_clear */
1352 0, /* tp_richcompare */
1353 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
1354 0, /* tp_iter */
1355 0, /* tp_iternext */
1356 pattern_methods, /* tp_methods */
1357 pattern_members, /* tp_members */
Guido van Rossumb700df92000-03-31 14:59:30 +00001358};
1359
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001360static int _validate(PatternObject *self); /* Forward */
1361
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001362static PyObject *
1363_compile(PyObject* self_, PyObject* args)
1364{
1365 /* "compile" pattern descriptor to pattern object */
1366
1367 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001368 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001369
1370 PyObject* pattern;
1371 int flags = 0;
1372 PyObject* code;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001373 Py_ssize_t groups = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001374 PyObject* groupindex = NULL;
1375 PyObject* indexgroup = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001376
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001377 if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001378 &PyList_Type, &code, &groups,
1379 &groupindex, &indexgroup))
1380 return NULL;
1381
1382 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001383 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001384 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1385 if (!self)
1386 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001387 self->weakreflist = NULL;
1388 self->pattern = NULL;
1389 self->groupindex = NULL;
1390 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001391
1392 self->codesize = n;
1393
1394 for (i = 0; i < n; i++) {
1395 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001396 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001397 self->code[i] = (SRE_CODE) value;
1398 if ((unsigned long) self->code[i] != value) {
1399 PyErr_SetString(PyExc_OverflowError,
1400 "regular expression code size limit exceeded");
1401 break;
1402 }
1403 }
1404
1405 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001406 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001407 return NULL;
1408 }
1409
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001410 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001411 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001412 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001413 else {
1414 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001415 int charsize;
1416 Py_buffer view;
1417 view.buf = NULL;
1418 if (!getstring(pattern, &p_length, &self->isbytes,
1419 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001420 Py_DECREF(self);
1421 return NULL;
1422 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001423 if (view.buf)
1424 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001425 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001426
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001427 Py_INCREF(pattern);
1428 self->pattern = pattern;
1429
1430 self->flags = flags;
1431
1432 self->groups = groups;
1433
1434 Py_XINCREF(groupindex);
1435 self->groupindex = groupindex;
1436
1437 Py_XINCREF(indexgroup);
1438 self->indexgroup = indexgroup;
1439
1440 self->weakreflist = NULL;
1441
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001442 if (!_validate(self)) {
1443 Py_DECREF(self);
1444 return NULL;
1445 }
1446
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001447 return (PyObject*) self;
1448}
1449
Guido van Rossumb700df92000-03-31 14:59:30 +00001450/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001451/* Code validation */
1452
1453/* To learn more about this code, have a look at the _compile() function in
1454 Lib/sre_compile.py. The validation functions below checks the code array
1455 for conformance with the code patterns generated there.
1456
1457 The nice thing about the generated code is that it is position-independent:
1458 all jumps are relative jumps forward. Also, jumps don't cross each other:
1459 the target of a later jump is always earlier than the target of an earlier
1460 jump. IOW, this is okay:
1461
1462 J---------J-------T--------T
1463 \ \_____/ /
1464 \______________________/
1465
1466 but this is not:
1467
1468 J---------J-------T--------T
1469 \_________\_____/ /
1470 \____________/
1471
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001472 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001473*/
1474
1475/* Defining this one enables tracing of the validator */
1476#undef VVERBOSE
1477
1478/* Trace macro for the validator */
1479#if defined(VVERBOSE)
1480#define VTRACE(v) printf v
1481#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001482#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001483#endif
1484
1485/* Report failure */
1486#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1487
1488/* Extract opcode, argument, or skip count from code array */
1489#define GET_OP \
1490 do { \
1491 VTRACE(("%p: ", code)); \
1492 if (code >= end) FAIL; \
1493 op = *code++; \
1494 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1495 } while (0)
1496#define GET_ARG \
1497 do { \
1498 VTRACE(("%p= ", code)); \
1499 if (code >= end) FAIL; \
1500 arg = *code++; \
1501 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1502 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001503#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001504 do { \
1505 VTRACE(("%p= ", code)); \
1506 if (code >= end) FAIL; \
1507 skip = *code; \
1508 VTRACE(("%lu (skip to %p)\n", \
1509 (unsigned long)skip, code+skip)); \
Victor Stinner1fa174a2013-08-28 02:06:21 +02001510 if (skip-adj > (Py_uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001511 FAIL; \
1512 code++; \
1513 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001514#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001515
1516static int
1517_validate_charset(SRE_CODE *code, SRE_CODE *end)
1518{
1519 /* Some variables are manipulated by the macros above */
1520 SRE_CODE op;
1521 SRE_CODE arg;
1522 SRE_CODE offset;
1523 int i;
1524
1525 while (code < end) {
1526 GET_OP;
1527 switch (op) {
1528
1529 case SRE_OP_NEGATE:
1530 break;
1531
1532 case SRE_OP_LITERAL:
1533 GET_ARG;
1534 break;
1535
1536 case SRE_OP_RANGE:
1537 GET_ARG;
1538 GET_ARG;
1539 break;
1540
1541 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001542 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001543 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001544 FAIL;
1545 code += offset;
1546 break;
1547
1548 case SRE_OP_BIGCHARSET:
1549 GET_ARG; /* Number of blocks */
1550 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001551 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001552 FAIL;
1553 /* Make sure that each byte points to a valid block */
1554 for (i = 0; i < 256; i++) {
1555 if (((unsigned char *)code)[i] >= arg)
1556 FAIL;
1557 }
1558 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001559 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001560 if (offset > (Py_uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001561 FAIL;
1562 code += offset;
1563 break;
1564
1565 case SRE_OP_CATEGORY:
1566 GET_ARG;
1567 switch (arg) {
1568 case SRE_CATEGORY_DIGIT:
1569 case SRE_CATEGORY_NOT_DIGIT:
1570 case SRE_CATEGORY_SPACE:
1571 case SRE_CATEGORY_NOT_SPACE:
1572 case SRE_CATEGORY_WORD:
1573 case SRE_CATEGORY_NOT_WORD:
1574 case SRE_CATEGORY_LINEBREAK:
1575 case SRE_CATEGORY_NOT_LINEBREAK:
1576 case SRE_CATEGORY_LOC_WORD:
1577 case SRE_CATEGORY_LOC_NOT_WORD:
1578 case SRE_CATEGORY_UNI_DIGIT:
1579 case SRE_CATEGORY_UNI_NOT_DIGIT:
1580 case SRE_CATEGORY_UNI_SPACE:
1581 case SRE_CATEGORY_UNI_NOT_SPACE:
1582 case SRE_CATEGORY_UNI_WORD:
1583 case SRE_CATEGORY_UNI_NOT_WORD:
1584 case SRE_CATEGORY_UNI_LINEBREAK:
1585 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1586 break;
1587 default:
1588 FAIL;
1589 }
1590 break;
1591
1592 default:
1593 FAIL;
1594
1595 }
1596 }
1597
1598 return 1;
1599}
1600
1601static int
1602_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1603{
1604 /* Some variables are manipulated by the macros above */
1605 SRE_CODE op;
1606 SRE_CODE arg;
1607 SRE_CODE skip;
1608
1609 VTRACE(("code=%p, end=%p\n", code, end));
1610
1611 if (code > end)
1612 FAIL;
1613
1614 while (code < end) {
1615 GET_OP;
1616 switch (op) {
1617
1618 case SRE_OP_MARK:
1619 /* We don't check whether marks are properly nested; the
1620 sre_match() code is robust even if they don't, and the worst
1621 you can get is nonsensical match results. */
1622 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001623 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001624 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1625 FAIL;
1626 }
1627 break;
1628
1629 case SRE_OP_LITERAL:
1630 case SRE_OP_NOT_LITERAL:
1631 case SRE_OP_LITERAL_IGNORE:
1632 case SRE_OP_NOT_LITERAL_IGNORE:
1633 GET_ARG;
1634 /* The arg is just a character, nothing to check */
1635 break;
1636
1637 case SRE_OP_SUCCESS:
1638 case SRE_OP_FAILURE:
1639 /* Nothing to check; these normally end the matching process */
1640 break;
1641
1642 case SRE_OP_AT:
1643 GET_ARG;
1644 switch (arg) {
1645 case SRE_AT_BEGINNING:
1646 case SRE_AT_BEGINNING_STRING:
1647 case SRE_AT_BEGINNING_LINE:
1648 case SRE_AT_END:
1649 case SRE_AT_END_LINE:
1650 case SRE_AT_END_STRING:
1651 case SRE_AT_BOUNDARY:
1652 case SRE_AT_NON_BOUNDARY:
1653 case SRE_AT_LOC_BOUNDARY:
1654 case SRE_AT_LOC_NON_BOUNDARY:
1655 case SRE_AT_UNI_BOUNDARY:
1656 case SRE_AT_UNI_NON_BOUNDARY:
1657 break;
1658 default:
1659 FAIL;
1660 }
1661 break;
1662
1663 case SRE_OP_ANY:
1664 case SRE_OP_ANY_ALL:
1665 /* These have no operands */
1666 break;
1667
1668 case SRE_OP_IN:
1669 case SRE_OP_IN_IGNORE:
1670 GET_SKIP;
1671 /* Stop 1 before the end; we check the FAILURE below */
1672 if (!_validate_charset(code, code+skip-2))
1673 FAIL;
1674 if (code[skip-2] != SRE_OP_FAILURE)
1675 FAIL;
1676 code += skip-1;
1677 break;
1678
1679 case SRE_OP_INFO:
1680 {
1681 /* A minimal info field is
1682 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1683 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1684 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001685 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001686 SRE_CODE *newcode;
1687 GET_SKIP;
1688 newcode = code+skip-1;
1689 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001690 GET_ARG;
1691 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001692 /* Check that only valid flags are present */
1693 if ((flags & ~(SRE_INFO_PREFIX |
1694 SRE_INFO_LITERAL |
1695 SRE_INFO_CHARSET)) != 0)
1696 FAIL;
1697 /* PREFIX and CHARSET are mutually exclusive */
1698 if ((flags & SRE_INFO_PREFIX) &&
1699 (flags & SRE_INFO_CHARSET))
1700 FAIL;
1701 /* LITERAL implies PREFIX */
1702 if ((flags & SRE_INFO_LITERAL) &&
1703 !(flags & SRE_INFO_PREFIX))
1704 FAIL;
1705 /* Validate the prefix */
1706 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001707 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001708 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001709 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001710 /* Here comes the prefix string */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001711 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001712 FAIL;
1713 code += prefix_len;
1714 /* And here comes the overlap table */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001715 if (prefix_len > (Py_uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001716 FAIL;
1717 /* Each overlap value should be < prefix_len */
1718 for (i = 0; i < prefix_len; i++) {
1719 if (code[i] >= prefix_len)
1720 FAIL;
1721 }
1722 code += prefix_len;
1723 }
1724 /* Validate the charset */
1725 if (flags & SRE_INFO_CHARSET) {
1726 if (!_validate_charset(code, newcode-1))
1727 FAIL;
1728 if (newcode[-1] != SRE_OP_FAILURE)
1729 FAIL;
1730 code = newcode;
1731 }
1732 else if (code != newcode) {
1733 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1734 FAIL;
1735 }
1736 }
1737 break;
1738
1739 case SRE_OP_BRANCH:
1740 {
1741 SRE_CODE *target = NULL;
1742 for (;;) {
1743 GET_SKIP;
1744 if (skip == 0)
1745 break;
1746 /* Stop 2 before the end; we check the JUMP below */
1747 if (!_validate_inner(code, code+skip-3, groups))
1748 FAIL;
1749 code += skip-3;
1750 /* Check that it ends with a JUMP, and that each JUMP
1751 has the same target */
1752 GET_OP;
1753 if (op != SRE_OP_JUMP)
1754 FAIL;
1755 GET_SKIP;
1756 if (target == NULL)
1757 target = code+skip-1;
1758 else if (code+skip-1 != target)
1759 FAIL;
1760 }
1761 }
1762 break;
1763
1764 case SRE_OP_REPEAT_ONE:
1765 case SRE_OP_MIN_REPEAT_ONE:
1766 {
1767 SRE_CODE min, max;
1768 GET_SKIP;
1769 GET_ARG; min = arg;
1770 GET_ARG; max = arg;
1771 if (min > max)
1772 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001773 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001774 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001775 if (!_validate_inner(code, code+skip-4, groups))
1776 FAIL;
1777 code += skip-4;
1778 GET_OP;
1779 if (op != SRE_OP_SUCCESS)
1780 FAIL;
1781 }
1782 break;
1783
1784 case SRE_OP_REPEAT:
1785 {
1786 SRE_CODE min, max;
1787 GET_SKIP;
1788 GET_ARG; min = arg;
1789 GET_ARG; max = arg;
1790 if (min > max)
1791 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001792 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001793 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001794 if (!_validate_inner(code, code+skip-3, groups))
1795 FAIL;
1796 code += skip-3;
1797 GET_OP;
1798 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1799 FAIL;
1800 }
1801 break;
1802
1803 case SRE_OP_GROUPREF:
1804 case SRE_OP_GROUPREF_IGNORE:
1805 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001806 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001807 FAIL;
1808 break;
1809
1810 case SRE_OP_GROUPREF_EXISTS:
1811 /* The regex syntax for this is: '(?(group)then|else)', where
1812 'group' is either an integer group number or a group name,
1813 'then' and 'else' are sub-regexes, and 'else' is optional. */
1814 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001815 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001816 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001817 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001818 code--; /* The skip is relative to the first arg! */
1819 /* There are two possibilities here: if there is both a 'then'
1820 part and an 'else' part, the generated code looks like:
1821
1822 GROUPREF_EXISTS
1823 <group>
1824 <skipyes>
1825 ...then part...
1826 JUMP
1827 <skipno>
1828 (<skipyes> jumps here)
1829 ...else part...
1830 (<skipno> jumps here)
1831
1832 If there is only a 'then' part, it looks like:
1833
1834 GROUPREF_EXISTS
1835 <group>
1836 <skip>
1837 ...then part...
1838 (<skip> jumps here)
1839
1840 There is no direct way to decide which it is, and we don't want
1841 to allow arbitrary jumps anywhere in the code; so we just look
1842 for a JUMP opcode preceding our skip target.
1843 */
Victor Stinner1fa174a2013-08-28 02:06:21 +02001844 if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001845 code[skip-3] == SRE_OP_JUMP)
1846 {
1847 VTRACE(("both then and else parts present\n"));
1848 if (!_validate_inner(code+1, code+skip-3, groups))
1849 FAIL;
1850 code += skip-2; /* Position after JUMP, at <skipno> */
1851 GET_SKIP;
1852 if (!_validate_inner(code, code+skip-1, groups))
1853 FAIL;
1854 code += skip-1;
1855 }
1856 else {
1857 VTRACE(("only a then part present\n"));
1858 if (!_validate_inner(code+1, code+skip-1, groups))
1859 FAIL;
1860 code += skip-1;
1861 }
1862 break;
1863
1864 case SRE_OP_ASSERT:
1865 case SRE_OP_ASSERT_NOT:
1866 GET_SKIP;
1867 GET_ARG; /* 0 for lookahead, width for lookbehind */
1868 code--; /* Back up over arg to simplify math below */
1869 if (arg & 0x80000000)
1870 FAIL; /* Width too large */
1871 /* Stop 1 before the end; we check the SUCCESS below */
1872 if (!_validate_inner(code+1, code+skip-2, groups))
1873 FAIL;
1874 code += skip-2;
1875 GET_OP;
1876 if (op != SRE_OP_SUCCESS)
1877 FAIL;
1878 break;
1879
1880 default:
1881 FAIL;
1882
1883 }
1884 }
1885
1886 VTRACE(("okay\n"));
1887 return 1;
1888}
1889
1890static int
1891_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1892{
1893 if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
1894 FAIL;
1895 if (groups == 0) /* fix for simplejson */
1896 groups = 100; /* 100 groups should always be safe */
1897 return _validate_inner(code, end-1, groups);
1898}
1899
1900static int
1901_validate(PatternObject *self)
1902{
1903 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1904 {
1905 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1906 return 0;
1907 }
1908 else
1909 VTRACE(("Success!\n"));
1910 return 1;
1911}
1912
1913/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001914/* match methods */
1915
1916static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001917match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001918{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001919 Py_XDECREF(self->regs);
1920 Py_XDECREF(self->string);
1921 Py_DECREF(self->pattern);
1922 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001923}
1924
1925static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001926match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001927{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001928 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001929 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001930 Py_buffer view;
1931 PyObject *result;
1932 void* ptr;
1933
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001934 if (index < 0 || index >= self->groups) {
1935 /* raise IndexError if we were given a bad group number */
1936 PyErr_SetString(
1937 PyExc_IndexError,
1938 "no such group"
1939 );
1940 return NULL;
1941 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001942
Fredrik Lundh6f013982000-07-03 18:44:21 +00001943 index *= 2;
1944
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001945 if (self->string == Py_None || self->mark[index] < 0) {
1946 /* return default value if the string or group is undefined */
1947 Py_INCREF(def);
1948 return def;
1949 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001950
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001951 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001952 if (ptr == NULL)
1953 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001954 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001955 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001956 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001957 PyBuffer_Release(&view);
1958 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001959}
1960
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001961static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001962match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001963{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001964 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001965
Guido van Rossumddefaf32007-01-14 03:31:43 +00001966 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001967 /* Default value */
1968 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001969
Christian Heimes217cfd12007-12-02 14:31:20 +00001970 if (PyLong_Check(index))
1971 return PyLong_AsSsize_t(index);
Guido van Rossumb700df92000-03-31 14:59:30 +00001972
Fredrik Lundh6f013982000-07-03 18:44:21 +00001973 i = -1;
1974
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001975 if (self->pattern->groupindex) {
1976 index = PyObject_GetItem(self->pattern->groupindex, index);
1977 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00001978 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00001979 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001980 Py_DECREF(index);
1981 } else
1982 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001983 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001984
1985 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001986}
1987
1988static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001989match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001990{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001991 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001992}
1993
1994static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001995match_expand(MatchObject* self, PyObject* ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001996{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001997 /* delegate to Python code */
1998 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001999 SRE_PY_MODULE, "_expand",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002000 PyTuple_Pack(3, self->pattern, self, ptemplate)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002001 );
2002}
2003
2004static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002005match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002006{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002007 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002008 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002009
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002010 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002011
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002012 switch (size) {
2013 case 0:
2014 result = match_getslice(self, Py_False, Py_None);
2015 break;
2016 case 1:
2017 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2018 break;
2019 default:
2020 /* fetch multiple items */
2021 result = PyTuple_New(size);
2022 if (!result)
2023 return NULL;
2024 for (i = 0; i < size; i++) {
2025 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002026 self, PyTuple_GET_ITEM(args, i), Py_None
2027 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002028 if (!item) {
2029 Py_DECREF(result);
2030 return NULL;
2031 }
2032 PyTuple_SET_ITEM(result, i, item);
2033 }
2034 break;
2035 }
2036 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002037}
2038
2039static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002040match_groups(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002041{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002042 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002043 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002044
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002045 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002046 static char* kwlist[] = { "default", NULL };
Fredrik Lundh562586e2000-10-03 20:43:34 +00002047 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002048 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002049
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002050 result = PyTuple_New(self->groups-1);
2051 if (!result)
2052 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002053
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 for (index = 1; index < self->groups; index++) {
2055 PyObject* item;
2056 item = match_getslice_by_index(self, index, def);
2057 if (!item) {
2058 Py_DECREF(result);
2059 return NULL;
2060 }
2061 PyTuple_SET_ITEM(result, index-1, item);
2062 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002063
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002064 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002065}
2066
2067static PyObject*
Fredrik Lundh562586e2000-10-03 20:43:34 +00002068match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
Guido van Rossumb700df92000-03-31 14:59:30 +00002069{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002070 PyObject* result;
2071 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002072 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002073
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002074 PyObject* def = Py_None;
Martin v. Löwis15e62742006-02-27 16:46:16 +00002075 static char* kwlist[] = { "default", NULL };
Fredrik Lundh770617b2001-01-14 15:06:11 +00002076 if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002077 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002078
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002079 result = PyDict_New();
2080 if (!result || !self->pattern->groupindex)
2081 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002082
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002083 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002084 if (!keys)
2085 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002086
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002087 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002088 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002089 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002090 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002091 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002092 if (!key)
2093 goto failed;
2094 value = match_getslice(self, key, def);
2095 if (!value) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002096 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002097 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002098 }
Fredrik Lundh770617b2001-01-14 15:06:11 +00002099 status = PyDict_SetItem(result, key, value);
2100 Py_DECREF(value);
2101 if (status < 0)
2102 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002103 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002104
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002105 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002106
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002107 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002108
2109failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002110 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002111 Py_DECREF(result);
2112 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002113}
2114
2115static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002116match_start(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002117{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002118 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002119
Guido van Rossumddefaf32007-01-14 03:31:43 +00002120 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002121 if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002122 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002123
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002124 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002125
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002126 if (index < 0 || index >= self->groups) {
2127 PyErr_SetString(
2128 PyExc_IndexError,
2129 "no such group"
2130 );
2131 return NULL;
2132 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002133
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002134 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002135 return PyLong_FromSsize_t(self->mark[index*2]);
Guido van Rossumb700df92000-03-31 14:59:30 +00002136}
2137
2138static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002139match_end(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002140{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002141 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002142
Guido van Rossumddefaf32007-01-14 03:31:43 +00002143 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002144 if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002145 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002146
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002147 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002149 if (index < 0 || index >= self->groups) {
2150 PyErr_SetString(
2151 PyExc_IndexError,
2152 "no such group"
2153 );
2154 return NULL;
2155 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002156
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002157 /* mark is -1 if group is undefined */
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002158 return PyLong_FromSsize_t(self->mark[index*2+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002159}
2160
2161LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002162_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002163{
2164 PyObject* pair;
2165 PyObject* item;
2166
2167 pair = PyTuple_New(2);
2168 if (!pair)
2169 return NULL;
2170
Christian Heimes217cfd12007-12-02 14:31:20 +00002171 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002172 if (!item)
2173 goto error;
2174 PyTuple_SET_ITEM(pair, 0, item);
2175
Christian Heimes217cfd12007-12-02 14:31:20 +00002176 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002177 if (!item)
2178 goto error;
2179 PyTuple_SET_ITEM(pair, 1, item);
2180
2181 return pair;
2182
2183 error:
2184 Py_DECREF(pair);
2185 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002186}
2187
2188static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002189match_span(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002190{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002191 Py_ssize_t index;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002192
Guido van Rossumddefaf32007-01-14 03:31:43 +00002193 PyObject* index_ = NULL;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002194 if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002195 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002196
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002197 index = match_getindex(self, index_);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002198
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002199 if (index < 0 || index >= self->groups) {
2200 PyErr_SetString(
2201 PyExc_IndexError,
2202 "no such group"
2203 );
2204 return NULL;
2205 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002206
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002207 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002208 return _pair(self->mark[index*2], self->mark[index*2+1]);
2209}
2210
2211static PyObject*
2212match_regs(MatchObject* self)
2213{
2214 PyObject* regs;
2215 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002216 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002217
2218 regs = PyTuple_New(self->groups);
2219 if (!regs)
2220 return NULL;
2221
2222 for (index = 0; index < self->groups; index++) {
2223 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2224 if (!item) {
2225 Py_DECREF(regs);
2226 return NULL;
2227 }
2228 PyTuple_SET_ITEM(regs, index, item);
2229 }
2230
2231 Py_INCREF(regs);
2232 self->regs = regs;
2233
2234 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002235}
2236
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002237static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002238match_copy(MatchObject* self, PyObject *unused)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002239{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002240#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002241 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002242 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002243
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002244 slots = 2 * (self->pattern->groups+1);
2245
2246 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2247 if (!copy)
2248 return NULL;
2249
2250 /* this value a constant, but any compiler should be able to
2251 figure that out all by itself */
2252 offset = offsetof(MatchObject, string);
2253
2254 Py_XINCREF(self->pattern);
2255 Py_XINCREF(self->string);
2256 Py_XINCREF(self->regs);
2257
2258 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002259 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002260
2261 return (PyObject*) copy;
2262#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002263 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002264 return NULL;
2265#endif
2266}
2267
2268static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002269match_deepcopy(MatchObject* self, PyObject* memo)
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002270{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002271#ifdef USE_BUILTIN_COPY
2272 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002273
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002274 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002275 if (!copy)
2276 return NULL;
2277
2278 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2279 !deepcopy(&copy->string, memo) ||
2280 !deepcopy(&copy->regs, memo)) {
2281 Py_DECREF(copy);
2282 return NULL;
2283 }
2284
2285#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002286 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2287 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002288#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002289}
2290
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002291PyDoc_STRVAR(match_doc,
2292"The result of re.match() and re.search().\n\
2293Match objects always have a boolean value of True.");
2294
2295PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002296"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002297 Return subgroup(s) of the match by indices or names.\n\
2298 For 0 returns the entire match.");
2299
2300PyDoc_STRVAR(match_start_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002301"start([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002302 Return index of the start of the substring matched by group.");
2303
2304PyDoc_STRVAR(match_end_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002305"end([group=0]) -> int.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002306 Return index of the end of the substring matched by group.");
2307
2308PyDoc_STRVAR(match_span_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002309"span([group]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002310 For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
2311
2312PyDoc_STRVAR(match_groups_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002313"groups([default=None]) -> tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002314 Return a tuple containing all the subgroups of the match, from 1.\n\
2315 The default argument is used for groups\n\
2316 that did not participate in the match");
2317
2318PyDoc_STRVAR(match_groupdict_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002319"groupdict([default=None]) -> dict.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002320 Return a dictionary containing all the named subgroups of the match,\n\
2321 keyed by the subgroup name. The default argument is used for groups\n\
2322 that did not participate in the match");
2323
2324PyDoc_STRVAR(match_expand_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002325"expand(template) -> str.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002326 Return the string obtained by doing backslash substitution\n\
2327 on the string template, as done by the sub() method.");
2328
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002329static PyMethodDef match_methods[] = {
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002330 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2331 {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
2332 {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
2333 {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
2334 {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
2335 match_groups_doc},
2336 {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
2337 match_groupdict_doc},
2338 {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002339 {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
2340 {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002341 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002342};
2343
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002344static PyObject *
2345match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002346{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002347 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002348 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002349 Py_INCREF(Py_None);
2350 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002351}
2352
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002353static PyObject *
2354match_lastgroup_get(MatchObject *self)
2355{
2356 if (self->pattern->indexgroup && self->lastindex >= 0) {
2357 PyObject* result = PySequence_GetItem(
2358 self->pattern->indexgroup, self->lastindex
2359 );
2360 if (result)
2361 return result;
2362 PyErr_Clear();
2363 }
2364 Py_INCREF(Py_None);
2365 return Py_None;
2366}
2367
2368static PyObject *
2369match_regs_get(MatchObject *self)
2370{
2371 if (self->regs) {
2372 Py_INCREF(self->regs);
2373 return self->regs;
2374 } else
2375 return match_regs(self);
2376}
2377
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002378static PyObject *
2379match_repr(MatchObject *self)
2380{
2381 PyObject *result;
2382 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2383 if (group0 == NULL)
2384 return NULL;
2385 result = PyUnicode_FromFormat(
2386 "<%s object; span=(%d, %d), match=%.50R>",
2387 Py_TYPE(self)->tp_name,
2388 self->mark[0], self->mark[1], group0);
2389 Py_DECREF(group0);
2390 return result;
2391}
2392
2393
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002394static PyGetSetDef match_getset[] = {
2395 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2396 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2397 {"regs", (getter)match_regs_get, (setter)NULL},
2398 {NULL}
2399};
2400
2401#define MATCH_OFF(x) offsetof(MatchObject, x)
2402static PyMemberDef match_members[] = {
2403 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2404 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2405 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2406 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2407 {NULL}
2408};
2409
Guido van Rossumb700df92000-03-31 14:59:30 +00002410/* FIXME: implement setattr("string", None) as a special case (to
2411 detach the associated string, if any */
2412
Neal Norwitz57c179c2006-03-22 07:18:02 +00002413static PyTypeObject Match_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002414 PyVarObject_HEAD_INIT(NULL,0)
2415 "_" SRE_MODULE ".SRE_Match",
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002416 sizeof(MatchObject), sizeof(Py_ssize_t),
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002417 (destructor)match_dealloc, /* tp_dealloc */
2418 0, /* tp_print */
2419 0, /* tp_getattr */
2420 0, /* tp_setattr */
2421 0, /* tp_reserved */
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002422 (reprfunc)match_repr, /* tp_repr */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002423 0, /* tp_as_number */
2424 0, /* tp_as_sequence */
2425 0, /* tp_as_mapping */
2426 0, /* tp_hash */
2427 0, /* tp_call */
2428 0, /* tp_str */
2429 0, /* tp_getattro */
2430 0, /* tp_setattro */
2431 0, /* tp_as_buffer */
2432 Py_TPFLAGS_DEFAULT, /* tp_flags */
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002433 match_doc, /* tp_doc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002434 0, /* tp_traverse */
2435 0, /* tp_clear */
2436 0, /* tp_richcompare */
2437 0, /* tp_weaklistoffset */
2438 0, /* tp_iter */
2439 0, /* tp_iternext */
2440 match_methods, /* tp_methods */
2441 match_members, /* tp_members */
2442 match_getset, /* tp_getset */
Guido van Rossumb700df92000-03-31 14:59:30 +00002443};
2444
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002445static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002446pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002447{
2448 /* create match object (from state object) */
2449
2450 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002451 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002452 char* base;
2453 int n;
2454
2455 if (status > 0) {
2456
2457 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002458 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002459 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2460 2*(pattern->groups+1));
2461 if (!match)
2462 return NULL;
2463
2464 Py_INCREF(pattern);
2465 match->pattern = pattern;
2466
2467 Py_INCREF(state->string);
2468 match->string = state->string;
2469
2470 match->regs = NULL;
2471 match->groups = pattern->groups+1;
2472
2473 /* fill in group slices */
2474
2475 base = (char*) state->beginning;
2476 n = state->charsize;
2477
2478 match->mark[0] = ((char*) state->start - base) / n;
2479 match->mark[1] = ((char*) state->ptr - base) / n;
2480
2481 for (i = j = 0; i < pattern->groups; i++, j+=2)
2482 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2483 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2484 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2485 } else
2486 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2487
2488 match->pos = state->pos;
2489 match->endpos = state->endpos;
2490
2491 match->lastindex = state->lastindex;
2492
2493 return (PyObject*) match;
2494
2495 } else if (status == 0) {
2496
2497 /* no match */
2498 Py_INCREF(Py_None);
2499 return Py_None;
2500
2501 }
2502
2503 /* internal error */
2504 pattern_error(status);
2505 return NULL;
2506}
2507
2508
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002509/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002510/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002511
2512static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002513scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002514{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002515 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002516 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002517 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002518}
2519
2520static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002521scanner_match(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002522{
2523 SRE_STATE* state = &self->state;
2524 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002525 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002526
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002527 state_reset(state);
2528
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002529 state->ptr = state->start;
2530
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002531 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002532 if (PyErr_Occurred())
2533 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002534
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002535 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002536 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002537
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002538 if (status == 0 || state->ptr == state->start)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002539 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002540 else
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002541 state->start = state->ptr;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002542
2543 return match;
2544}
2545
2546
2547static PyObject*
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002548scanner_search(ScannerObject* self, PyObject *unused)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002549{
2550 SRE_STATE* state = &self->state;
2551 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002552 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002553
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002554 state_reset(state);
2555
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002556 state->ptr = state->start;
2557
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002558 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002559 if (PyErr_Occurred())
2560 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002561
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002562 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002563 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002564
Gustavo Niemeyer0506c642004-09-03 18:11:59 +00002565 if (status == 0 || state->ptr == state->start)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002566 state->start = (void*) ((char*) state->ptr + state->charsize);
2567 else
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002568 state->start = state->ptr;
2569
2570 return match;
2571}
2572
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002573static PyMethodDef scanner_methods[] = {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002574 {"match", (PyCFunction) scanner_match, METH_NOARGS},
2575 {"search", (PyCFunction) scanner_search, METH_NOARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002576 {NULL, NULL}
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002577};
2578
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002579#define SCAN_OFF(x) offsetof(ScannerObject, x)
2580static PyMemberDef scanner_members[] = {
Ezio Melotti7c8c1ea2011-09-29 01:00:19 +03002581 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002582 {NULL} /* Sentinel */
2583};
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002584
Neal Norwitz57c179c2006-03-22 07:18:02 +00002585static PyTypeObject Scanner_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002586 PyVarObject_HEAD_INIT(NULL, 0)
2587 "_" SRE_MODULE ".SRE_Scanner",
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002588 sizeof(ScannerObject), 0,
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002589 (destructor)scanner_dealloc,/* tp_dealloc */
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002590 0, /* tp_print */
2591 0, /* tp_getattr */
2592 0, /* tp_setattr */
2593 0, /* tp_reserved */
2594 0, /* tp_repr */
2595 0, /* tp_as_number */
2596 0, /* tp_as_sequence */
2597 0, /* tp_as_mapping */
2598 0, /* tp_hash */
2599 0, /* tp_call */
2600 0, /* tp_str */
2601 0, /* tp_getattro */
2602 0, /* tp_setattro */
2603 0, /* tp_as_buffer */
2604 Py_TPFLAGS_DEFAULT, /* tp_flags */
2605 0, /* tp_doc */
2606 0, /* tp_traverse */
2607 0, /* tp_clear */
2608 0, /* tp_richcompare */
2609 0, /* tp_weaklistoffset */
2610 0, /* tp_iter */
2611 0, /* tp_iternext */
2612 scanner_methods, /* tp_methods */
2613 scanner_members, /* tp_members */
2614 0, /* tp_getset */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002615};
2616
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002617static PyObject*
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002618pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002619{
2620 /* create search state object */
2621
2622 ScannerObject* self;
2623
2624 PyObject* string;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002625 Py_ssize_t start = 0;
2626 Py_ssize_t end = PY_SSIZE_T_MAX;
Sean Reifschneider7b3c9752012-03-12 18:22:38 -06002627 static char* kwlist[] = { "source", "pos", "endpos", NULL };
2628 if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:scanner", kwlist,
2629 &string, &start, &end))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002630 return NULL;
2631
2632 /* create scanner object */
2633 self = PyObject_NEW(ScannerObject, &Scanner_Type);
2634 if (!self)
2635 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002636 self->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002637
2638 string = state_init(&self->state, pattern, string, start, end);
2639 if (!string) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002640 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002641 return NULL;
2642 }
2643
2644 Py_INCREF(pattern);
2645 self->pattern = (PyObject*) pattern;
2646
2647 return (PyObject*) self;
2648}
2649
Guido van Rossumb700df92000-03-31 14:59:30 +00002650static PyMethodDef _functions[] = {
Neal Norwitzb0493252002-03-31 14:44:22 +00002651 {"compile", _compile, METH_VARARGS},
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002652 {"getcodesize", sre_codesize, METH_NOARGS},
Neal Norwitzb0493252002-03-31 14:44:22 +00002653 {"getlower", sre_getlower, METH_VARARGS},
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002654 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002655};
2656
Martin v. Löwis1a214512008-06-11 05:26:20 +00002657static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002658 PyModuleDef_HEAD_INIT,
2659 "_" SRE_MODULE,
2660 NULL,
2661 -1,
2662 _functions,
2663 NULL,
2664 NULL,
2665 NULL,
2666 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002667};
2668
2669PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002670{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002671 PyObject* m;
2672 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002673 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002674
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002675 /* Patch object types */
2676 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2677 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002678 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002679
Martin v. Löwis1a214512008-06-11 05:26:20 +00002680 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002681 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002682 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002683 d = PyModule_GetDict(m);
2684
Christian Heimes217cfd12007-12-02 14:31:20 +00002685 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002686 if (x) {
2687 PyDict_SetItemString(d, "MAGIC", x);
2688 Py_DECREF(x);
2689 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002690
Christian Heimes217cfd12007-12-02 14:31:20 +00002691 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002692 if (x) {
2693 PyDict_SetItemString(d, "CODESIZE", x);
2694 Py_DECREF(x);
2695 }
2696
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002697 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2698 if (x) {
2699 PyDict_SetItemString(d, "MAXREPEAT", x);
2700 Py_DECREF(x);
2701 }
2702
Neal Norwitzfe537132007-08-26 03:55:15 +00002703 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002704 if (x) {
2705 PyDict_SetItemString(d, "copyright", x);
2706 Py_DECREF(x);
2707 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002708 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002709}
2710
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002711/* vim:ts=4:sw=4:et
2712*/