blob: 69c7bc0de6962e72c062fed379e48dc625e13d33 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh29c08be2000-06-29 23:33:12 +000063/* optional features */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000065/* enables copy/deepcopy handling (work in progress) */
66#undef USE_BUILTIN_COPY
67
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000068/* -------------------------------------------------------------------- */
69
Fredrik Lundh80946112000-06-29 18:03:25 +000070#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000071#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000072#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000073/* fastest possible local call under MSVC */
74#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075#elif defined(USE_INLINE)
Fredrik Lundh29c08be2000-06-29 23:33:12 +000076#define LOCAL(type) static inline type
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000077#else
78#define LOCAL(type) static type
Guido van Rossumb700df92000-03-31 14:59:30 +000079#endif
80
81/* error codes */
82#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000083#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000084#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000085#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000086#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000087
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000088#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000089#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000090#else
91#define TRACE(v)
92#endif
93
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000094/* -------------------------------------------------------------------- */
95/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000096
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000099#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000101#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000103#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300104 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000105#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300106 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +0000107
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000108static unsigned int sre_lower(unsigned int ch)
109{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300110 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000111}
112
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200113static unsigned int sre_upper(unsigned int ch)
114{
115 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
116}
117
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000118/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000119/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
120 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000121#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000122#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
123
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000124static unsigned int sre_lower_locale(unsigned int ch)
125{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000126 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000127}
128
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200129static unsigned int sre_upper_locale(unsigned int ch)
130{
131 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
132}
133
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134/* unicode-specific character predicates */
135
Victor Stinner0058b862011-09-29 03:27:47 +0200136#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
137#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
138#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
139#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
140#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000141
142static unsigned int sre_lower_unicode(unsigned int ch)
143{
Victor Stinner0058b862011-09-29 03:27:47 +0200144 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000145}
146
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200147static unsigned int sre_upper_unicode(unsigned int ch)
148{
149 return (unsigned int) Py_UNICODE_TOUPPER(ch);
150}
151
Guido van Rossumb700df92000-03-31 14:59:30 +0000152LOCAL(int)
153sre_category(SRE_CODE category, unsigned int ch)
154{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000155 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000156
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000157 case SRE_CATEGORY_DIGIT:
158 return SRE_IS_DIGIT(ch);
159 case SRE_CATEGORY_NOT_DIGIT:
160 return !SRE_IS_DIGIT(ch);
161 case SRE_CATEGORY_SPACE:
162 return SRE_IS_SPACE(ch);
163 case SRE_CATEGORY_NOT_SPACE:
164 return !SRE_IS_SPACE(ch);
165 case SRE_CATEGORY_WORD:
166 return SRE_IS_WORD(ch);
167 case SRE_CATEGORY_NOT_WORD:
168 return !SRE_IS_WORD(ch);
169 case SRE_CATEGORY_LINEBREAK:
170 return SRE_IS_LINEBREAK(ch);
171 case SRE_CATEGORY_NOT_LINEBREAK:
172 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000173
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000174 case SRE_CATEGORY_LOC_WORD:
175 return SRE_LOC_IS_WORD(ch);
176 case SRE_CATEGORY_LOC_NOT_WORD:
177 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000178
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000179 case SRE_CATEGORY_UNI_DIGIT:
180 return SRE_UNI_IS_DIGIT(ch);
181 case SRE_CATEGORY_UNI_NOT_DIGIT:
182 return !SRE_UNI_IS_DIGIT(ch);
183 case SRE_CATEGORY_UNI_SPACE:
184 return SRE_UNI_IS_SPACE(ch);
185 case SRE_CATEGORY_UNI_NOT_SPACE:
186 return !SRE_UNI_IS_SPACE(ch);
187 case SRE_CATEGORY_UNI_WORD:
188 return SRE_UNI_IS_WORD(ch);
189 case SRE_CATEGORY_UNI_NOT_WORD:
190 return !SRE_UNI_IS_WORD(ch);
191 case SRE_CATEGORY_UNI_LINEBREAK:
192 return SRE_UNI_IS_LINEBREAK(ch);
193 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
194 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000195 }
196 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000197}
198
199/* helpers */
200
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000201static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000204 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000206 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000207 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000208 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000209}
210
211static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000213{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000215 minsize = state->data_stack_base+size;
216 cursize = state->data_stack_size;
217 if (cursize < minsize) {
218 void* stack;
219 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300220 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000222 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000223 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000224 return SRE_ERROR_MEMORY;
225 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000226 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000227 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000228 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000230}
231
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000232/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000233
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300234#define SRE_CHAR Py_UCS1
235#define SIZEOF_SRE_CHAR 1
236#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300237#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000238
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300239/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000240
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300241#define SRE_CHAR Py_UCS2
242#define SIZEOF_SRE_CHAR 2
243#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300244#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000245
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300246/* generate 32-bit unicode version */
247
248#define SRE_CHAR Py_UCS4
249#define SIZEOF_SRE_CHAR 4
250#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300251#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000252
253/* -------------------------------------------------------------------- */
254/* factories and destructors */
255
256/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100257static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300258static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300260
261/*[clinic input]
262module _sre
263class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
264class _sre.SRE_Match "MatchObject *" "&Match_Type"
265class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
266[clinic start generated code]*/
267/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
268
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700269static PyTypeObject Pattern_Type;
270static PyTypeObject Match_Type;
271static PyTypeObject Scanner_Type;
272
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300273/*[clinic input]
274_sre.getcodesize -> int
275[clinic start generated code]*/
276
277static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300278_sre_getcodesize_impl(PyObject *module)
279/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000280{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300281 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000282}
283
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300284/*[clinic input]
285_sre.getlower -> int
286
287 character: int
288 flags: int
289 /
290
291[clinic start generated code]*/
292
293static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300294_sre_getlower_impl(PyObject *module, int character, int flags)
295/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000296{
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000297 if (flags & SRE_FLAG_LOCALE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300298 return sre_lower_locale(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000299 if (flags & SRE_FLAG_UNICODE)
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300300 return sre_lower_unicode(character);
301 return sre_lower(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000302}
303
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000304LOCAL(void)
305state_reset(SRE_STATE* state)
306{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000307 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000308 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000309
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000310 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000311 state->lastindex = -1;
312
313 state->repeat = NULL;
314
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000315 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000316}
317
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000318static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300320 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600321 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000322{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000323 /* given a python object, return a data pointer, a length (in
324 characters), and a character size. return NULL if the object
325 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000326
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000327 /* Unicode objects do not support the buffer API. So, get the data
328 directly instead. */
329 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 if (PyUnicode_READY(string) == -1)
331 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200333 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300334 *p_isbytes = 0;
335 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000336 }
337
Victor Stinner0058b862011-09-29 03:27:47 +0200338 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300339 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200340 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300341 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000342 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000343
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300344 *p_length = view->len;
345 *p_charsize = 1;
346 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000347
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300348 if (view->buf == NULL) {
349 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
350 PyBuffer_Release(view);
351 view->buf = NULL;
352 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000353 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300354 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000355}
356
357LOCAL(PyObject*)
358state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000360{
361 /* prepare state object */
362
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300364 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000365 void* ptr;
366
367 memset(state, 0, sizeof(SRE_STATE));
368
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300369 state->mark = PyMem_New(void *, pattern->groups * 2);
370 if (!state->mark) {
371 PyErr_NoMemory();
372 goto err;
373 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000374 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000375 state->lastindex = -1;
376
Benjamin Petersone48944b2012-03-07 14:50:25 -0600377 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300378 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000379 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600380 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000381
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600383 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200384 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600385 goto err;
386 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600388 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200389 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600390 goto err;
391 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000392
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000393 /* adjust boundaries */
394 if (start < 0)
395 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396 else if (start > length)
397 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000398
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000399 if (end < 0)
400 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 else if (end > length)
402 end = length;
403
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300404 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000405 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000406
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000407 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000408
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000409 state->start = (void*) ((char*) ptr + start * state->charsize);
410 state->end = (void*) ((char*) ptr + end * state->charsize);
411
412 Py_INCREF(string);
413 state->string = string;
414 state->pos = start;
415 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000416
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200417 if (pattern->flags & SRE_FLAG_LOCALE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000418 state->lower = sre_lower_locale;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200419 state->upper = sre_upper_locale;
420 }
421 else if (pattern->flags & SRE_FLAG_UNICODE) {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000422 state->lower = sre_lower_unicode;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200423 state->upper = sre_upper_unicode;
424 }
425 else {
Fredrik Lundhb389df32000-06-29 12:48:37 +0000426 state->lower = sre_lower;
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200427 state->upper = sre_upper;
428 }
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000429
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000430 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300432 PyMem_Del(state->mark);
433 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600434 if (state->buffer.buf)
435 PyBuffer_Release(&state->buffer);
436 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000437}
438
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000439LOCAL(void)
440state_fini(SRE_STATE* state)
441{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600442 if (state->buffer.buf)
443 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000444 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000445 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300446 PyMem_Del(state->mark);
447 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000448}
449
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000450/* calculate offset from start of string */
451#define STATE_OFFSET(state, member)\
452 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
453
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000454LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300455getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300456 PyObject* string, Py_ssize_t start, Py_ssize_t end)
457{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300458 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300459 if (PyBytes_CheckExact(string) &&
460 start == 0 && end == PyBytes_GET_SIZE(string)) {
461 Py_INCREF(string);
462 return string;
463 }
464 return PyBytes_FromStringAndSize(
465 (const char *)ptr + start, end - start);
466 }
467 else {
468 return PyUnicode_Substring(string, start, end);
469 }
470}
471
472LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000474{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000476
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000477 index = (index - 1) * 2;
478
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000479 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000480 if (empty)
481 /* want empty string */
482 i = j = 0;
483 else {
484 Py_INCREF(Py_None);
485 return Py_None;
486 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000487 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000488 i = STATE_OFFSET(state, state->mark[index]);
489 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000490 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000491
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300492 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000493}
494
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000495static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100496pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000497{
498 switch (status) {
499 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400500 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000501 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400502 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000503 "maximum recursion limit exceeded"
504 );
505 break;
506 case SRE_ERROR_MEMORY:
507 PyErr_NoMemory();
508 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000509 case SRE_ERROR_INTERRUPTED:
510 /* An exception has already been raised, so let it fly */
511 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000512 default:
513 /* other error codes indicate compiler/engine bugs */
514 PyErr_SetString(
515 PyExc_RuntimeError,
516 "internal error in regular expression engine"
517 );
518 }
519}
520
Guido van Rossumb700df92000-03-31 14:59:30 +0000521static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000523{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000524 if (self->weakreflist != NULL)
525 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000526 Py_XDECREF(self->pattern);
527 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000528 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000529 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000530}
531
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300532LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300533sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300534{
535 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300536 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300537 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300538 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300539 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300540 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300541}
542
543LOCAL(Py_ssize_t)
544sre_search(SRE_STATE* state, SRE_CODE* pattern)
545{
546 if (state->charsize == 1)
547 return sre_ucs1_search(state, pattern);
548 if (state->charsize == 2)
549 return sre_ucs2_search(state, pattern);
550 assert(state->charsize == 4);
551 return sre_ucs4_search(state, pattern);
552}
553
Larry Hastings16c51912014-01-07 11:53:01 -0800554static PyObject *
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200555fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
556{
557 if (string2 != NULL) {
558 if (string != NULL) {
559 PyErr_Format(PyExc_TypeError,
560 "Argument given by name ('%s') and position (1)",
561 oldname);
562 return NULL;
563 }
564 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
565 "The '%s' keyword parameter name is deprecated. "
566 "Use 'string' instead.", oldname) < 0)
567 return NULL;
568 return string2;
569 }
570 if (string == NULL) {
571 PyErr_SetString(PyExc_TypeError,
572 "Required argument 'string' (pos 1) not found");
573 return NULL;
574 }
575 return string;
576}
Larry Hastings16c51912014-01-07 11:53:01 -0800577
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300578/*[clinic input]
579_sre.SRE_Pattern.match
580
581 string: object = NULL
582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
584 *
585 pattern: object = NULL
586
587Matches zero or more characters at the beginning of the string.
588[clinic start generated code]*/
589
Larry Hastings16c51912014-01-07 11:53:01 -0800590static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300591_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
592 Py_ssize_t pos, Py_ssize_t endpos,
593 PyObject *pattern)
594/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800595{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000596 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100597 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300598 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000599
Serhiy Storchakaa537eb42014-03-06 11:36:15 +0200600 string = fix_string_param(string, pattern, "pattern");
601 if (!string)
602 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300603 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000604 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000606 state.ptr = state.start;
607
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000608 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
609
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300610 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000611
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000612 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300613 if (PyErr_Occurred()) {
614 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000615 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300616 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000617
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300618 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000619 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300620 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000621}
622
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300623/*[clinic input]
624_sre.SRE_Pattern.fullmatch
625
626 string: object = NULL
627 pos: Py_ssize_t = 0
628 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
629 *
630 pattern: object = NULL
631
632Matches against all of the string
633[clinic start generated code]*/
634
635static PyObject *
636_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
637 Py_ssize_t pos, Py_ssize_t endpos,
638 PyObject *pattern)
639/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200640{
641 SRE_STATE state;
642 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300643 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300645 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200646 if (!string)
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200647 return NULL;
648
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300649 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200650 return NULL;
651
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200652 state.ptr = state.start;
653
654 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
655
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300656 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200657
658 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300659 if (PyErr_Occurred()) {
660 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200661 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300662 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200663
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300664 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200665 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300666 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200667}
668
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300669/*[clinic input]
670_sre.SRE_Pattern.search
671
672 string: object = NULL
673 pos: Py_ssize_t = 0
674 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
675 *
676 pattern: object = NULL
677
678Scan through string looking for a match, and return a corresponding match object instance.
679
680Return None if no position in the string matches.
681[clinic start generated code]*/
682
683static PyObject *
684_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
685 Py_ssize_t pos, Py_ssize_t endpos,
686 PyObject *pattern)
687/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000688{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000689 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100690 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300691 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000692
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300693 string = fix_string_param(string, pattern, "pattern");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200694 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000695 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000696
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300697 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000698 return NULL;
699
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000700 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
701
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300702 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000703
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000704 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
705
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300706 if (PyErr_Occurred()) {
707 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000708 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300709 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000710
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300711 match = pattern_new_match(self, &state, status);
712 state_fini(&state);
713 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000714}
715
716static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200717call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000718{
719 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000720 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000721 PyObject* func;
722 PyObject* result;
723
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000724 if (!args)
725 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000726 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000727 if (!name)
728 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000729 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000730 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000731 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000732 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000733 func = PyObject_GetAttrString(mod, function);
734 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000735 if (!func)
736 return NULL;
737 result = PyObject_CallObject(func, args);
738 Py_DECREF(func);
739 Py_DECREF(args);
740 return result;
741}
742
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000743#ifdef USE_BUILTIN_COPY
744static int
745deepcopy(PyObject** object, PyObject* memo)
746{
747 PyObject* copy;
748
749 copy = call(
750 "copy", "deepcopy",
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000751 PyTuple_Pack(2, *object, memo)
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000752 );
753 if (!copy)
754 return 0;
755
Serhiy Storchaka57a01d32016-04-10 18:05:40 +0300756 Py_SETREF(*object, copy);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000757
758 return 1; /* success */
759}
760#endif
761
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300762/*[clinic input]
763_sre.SRE_Pattern.findall
764
765 string: object = NULL
766 pos: Py_ssize_t = 0
767 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
768 *
769 source: object = NULL
770
771Return a list of all non-overlapping matches of pattern in string.
772[clinic start generated code]*/
773
774static PyObject *
775_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
776 Py_ssize_t pos, Py_ssize_t endpos,
777 PyObject *source)
778/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000779{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000780 SRE_STATE state;
781 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100782 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000783 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000784
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300785 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200786 if (!string)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000787 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000788
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300789 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000790 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000791
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000792 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000793 if (!list) {
794 state_fini(&state);
795 return NULL;
796 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000797
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000798 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000799
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000800 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000801
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000802 state_reset(&state);
803
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000804 state.ptr = state.start;
805
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300806 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300807 if (PyErr_Occurred())
808 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000809
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000810 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000811 if (status == 0)
812 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000813 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000814 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000815 }
Tim Peters3d563502006-01-21 02:47:53 +0000816
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000817 /* don't bother to build a match object */
818 switch (self->groups) {
819 case 0:
820 b = STATE_OFFSET(&state, state.start);
821 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300822 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300823 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000824 if (!item)
825 goto error;
826 break;
827 case 1:
828 item = state_getslice(&state, 1, string, 1);
829 if (!item)
830 goto error;
831 break;
832 default:
833 item = PyTuple_New(self->groups);
834 if (!item)
835 goto error;
836 for (i = 0; i < self->groups; i++) {
837 PyObject* o = state_getslice(&state, i+1, string, 1);
838 if (!o) {
839 Py_DECREF(item);
840 goto error;
841 }
842 PyTuple_SET_ITEM(item, i, o);
843 }
844 break;
845 }
846
847 status = PyList_Append(list, item);
848 Py_DECREF(item);
849 if (status < 0)
850 goto error;
851
852 if (state.ptr == state.start)
853 state.start = (void*) ((char*) state.ptr + state.charsize);
854 else
855 state.start = state.ptr;
856
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000857 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000858
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000859 state_fini(&state);
860 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000861
862error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000863 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000864 state_fini(&state);
865 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000866
Guido van Rossumb700df92000-03-31 14:59:30 +0000867}
868
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300869/*[clinic input]
870_sre.SRE_Pattern.finditer
871
872 string: object
873 pos: Py_ssize_t = 0
874 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
875
876Return an iterator over all non-overlapping matches for the RE pattern in string.
877
878For each match, the iterator returns a match object.
879[clinic start generated code]*/
880
881static PyObject *
882_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
883 Py_ssize_t pos, Py_ssize_t endpos)
884/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000885{
886 PyObject* scanner;
887 PyObject* search;
888 PyObject* iterator;
889
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300890 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000891 if (!scanner)
892 return NULL;
893
894 search = PyObject_GetAttrString(scanner, "search");
895 Py_DECREF(scanner);
896 if (!search)
897 return NULL;
898
899 iterator = PyCallIter_New(search, Py_None);
900 Py_DECREF(search);
901
902 return iterator;
903}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000904
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300905/*[clinic input]
906_sre.SRE_Pattern.scanner
907
908 string: object
909 pos: Py_ssize_t = 0
910 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
911
912[clinic start generated code]*/
913
914static PyObject *
915_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
916 Py_ssize_t pos, Py_ssize_t endpos)
917/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
918{
919 return pattern_scanner(self, string, pos, endpos);
920}
921
922/*[clinic input]
923_sre.SRE_Pattern.split
924
925 string: object = NULL
926 maxsplit: Py_ssize_t = 0
927 *
928 source: object = NULL
929
930Split string by the occurrences of pattern.
931[clinic start generated code]*/
932
933static PyObject *
934_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
935 Py_ssize_t maxsplit, PyObject *source)
936/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000937{
938 SRE_STATE state;
939 PyObject* list;
940 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100941 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000942 Py_ssize_t n;
943 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000944 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000945
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300946 string = fix_string_param(string, source, "source");
Serhiy Storchakaccdf3522014-03-06 11:28:32 +0200947 if (!string)
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948 return NULL;
949
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200950 assert(self->codesize != 0);
951 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
952 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
953 PyErr_SetString(PyExc_ValueError,
954 "split() requires a non-empty pattern match.");
955 return NULL;
956 }
957 if (PyErr_WarnEx(PyExc_FutureWarning,
958 "split() requires a non-empty pattern match.",
959 1) < 0)
960 return NULL;
961 }
962
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300963 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000964 return NULL;
965
966 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000967 if (!list) {
968 state_fini(&state);
969 return NULL;
970 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000971
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000972 n = 0;
973 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000974
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000975 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000976
977 state_reset(&state);
978
979 state.ptr = state.start;
980
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300981 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300982 if (PyErr_Occurred())
983 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000984
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000985 if (status <= 0) {
986 if (status == 0)
987 break;
988 pattern_error(status);
989 goto error;
990 }
Tim Peters3d563502006-01-21 02:47:53 +0000991
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300993 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000994 break;
995 /* skip one character */
996 state.start = (void*) ((char*) state.ptr + state.charsize);
997 continue;
998 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000999
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001000 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001001 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001002 string, STATE_OFFSET(&state, last),
1003 STATE_OFFSET(&state, state.start)
1004 );
1005 if (!item)
1006 goto error;
1007 status = PyList_Append(list, item);
1008 Py_DECREF(item);
1009 if (status < 0)
1010 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001011
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001012 /* add groups (if any) */
1013 for (i = 0; i < self->groups; i++) {
1014 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001015 if (!item)
1016 goto error;
1017 status = PyList_Append(list, item);
1018 Py_DECREF(item);
1019 if (status < 0)
1020 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001021 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001022
1023 n = n + 1;
1024
1025 last = state.start = state.ptr;
1026
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001027 }
1028
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001029 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001030 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +00001031 string, STATE_OFFSET(&state, last), state.endpos
1032 );
1033 if (!item)
1034 goto error;
1035 status = PyList_Append(list, item);
1036 Py_DECREF(item);
1037 if (status < 0)
1038 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001039
1040 state_fini(&state);
1041 return list;
1042
1043error:
1044 Py_DECREF(list);
1045 state_fini(&state);
1046 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001047
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001048}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001049
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001050static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001051pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001052 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001053{
1054 SRE_STATE state;
1055 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001056 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001057 PyObject* item;
1058 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001059 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001060 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001061 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001062 Py_ssize_t n;
1063 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001064 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001065 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001066 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001067
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001068 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001069 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001070 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001071 Py_INCREF(filter);
1072 filter_is_callable = 1;
1073 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001074 /* if not callable, check if it's a literal string */
1075 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001076 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001077 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001078 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001079 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001080 if (charsize == 1)
1081 literal = memchr(ptr, '\\', n) == NULL;
1082 else
1083 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001084 } else {
1085 PyErr_Clear();
1086 literal = 0;
1087 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001088 if (view.buf)
1089 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001090 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001091 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001092 Py_INCREF(filter);
1093 filter_is_callable = 0;
1094 } else {
1095 /* not a literal; hand it over to the template compiler */
1096 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001097 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001098 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001099 );
1100 if (!filter)
1101 return NULL;
1102 filter_is_callable = PyCallable_Check(filter);
1103 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001104 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001105
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001106 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001107 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001108 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001109 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110
1111 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001112 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001113 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001114 state_fini(&state);
1115 return NULL;
1116 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001117
1118 n = i = 0;
1119
1120 while (!count || n < count) {
1121
1122 state_reset(&state);
1123
1124 state.ptr = state.start;
1125
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001126 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001127 if (PyErr_Occurred())
1128 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001129
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001130 if (status <= 0) {
1131 if (status == 0)
1132 break;
1133 pattern_error(status);
1134 goto error;
1135 }
Tim Peters3d563502006-01-21 02:47:53 +00001136
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001137 b = STATE_OFFSET(&state, state.start);
1138 e = STATE_OFFSET(&state, state.ptr);
1139
1140 if (i < b) {
1141 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001142 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001143 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001144 if (!item)
1145 goto error;
1146 status = PyList_Append(list, item);
1147 Py_DECREF(item);
1148 if (status < 0)
1149 goto error;
1150
1151 } else if (i == b && i == e && n > 0)
1152 /* ignore empty match on latest position */
1153 goto next;
1154
1155 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001156 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001157 match = pattern_new_match(self, &state, 1);
1158 if (!match)
1159 goto error;
Victor Stinner559bb6a2016-08-22 22:48:54 +02001160 item = _PyObject_CallArg1(filter, match);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161 Py_DECREF(match);
1162 if (!item)
1163 goto error;
1164 } else {
1165 /* filter is literal string */
1166 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001167 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001168 }
1169
1170 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001171 if (item != Py_None) {
1172 status = PyList_Append(list, item);
1173 Py_DECREF(item);
1174 if (status < 0)
1175 goto error;
1176 }
Tim Peters3d563502006-01-21 02:47:53 +00001177
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001178 i = e;
1179 n = n + 1;
1180
1181next:
1182 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001183 if (state.ptr == state.end)
1184 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001185 if (state.ptr == state.start)
1186 state.start = (void*) ((char*) state.ptr + state.charsize);
1187 else
1188 state.start = state.ptr;
1189
1190 }
1191
1192 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001193 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001194 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001195 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001196 if (!item)
1197 goto error;
1198 status = PyList_Append(list, item);
1199 Py_DECREF(item);
1200 if (status < 0)
1201 goto error;
1202 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001203
1204 state_fini(&state);
1205
Guido van Rossum4e173842001-12-07 04:25:10 +00001206 Py_DECREF(filter);
1207
Fredrik Lundhdac58492001-10-21 21:48:30 +00001208 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001209 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001210 if (!joiner) {
1211 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001212 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001213 }
1214 if (PyList_GET_SIZE(list) == 0) {
1215 Py_DECREF(list);
1216 item = joiner;
1217 }
1218 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001219 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001220 item = _PyBytes_Join(joiner, list);
1221 else
1222 item = PyUnicode_Join(joiner, list);
1223 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001224 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001225 if (!item)
1226 return NULL;
1227 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001228
1229 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001230 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001231
1232 return item;
1233
1234error:
1235 Py_DECREF(list);
1236 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001237 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001238 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001239
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001240}
1241
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001242/*[clinic input]
1243_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001244
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001245 repl: object
1246 string: object
1247 count: Py_ssize_t = 0
1248
1249Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1250[clinic start generated code]*/
1251
1252static PyObject *
1253_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1254 PyObject *string, Py_ssize_t count)
1255/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1256{
1257 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001258}
1259
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001260/*[clinic input]
1261_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001262
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001263 repl: object
1264 string: object
1265 count: Py_ssize_t = 0
1266
1267Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1268[clinic start generated code]*/
1269
1270static PyObject *
1271_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1272 PyObject *string, Py_ssize_t count)
1273/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1274{
1275 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001276}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001277
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001278/*[clinic input]
1279_sre.SRE_Pattern.__copy__
1280
1281[clinic start generated code]*/
1282
1283static PyObject *
1284_sre_SRE_Pattern___copy___impl(PatternObject *self)
1285/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001286{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001287#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001288 PatternObject* copy;
1289 int offset;
1290
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001291 copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1292 if (!copy)
1293 return NULL;
1294
1295 offset = offsetof(PatternObject, groups);
1296
1297 Py_XINCREF(self->groupindex);
1298 Py_XINCREF(self->indexgroup);
1299 Py_XINCREF(self->pattern);
1300
1301 memcpy((char*) copy + offset, (char*) self + offset,
1302 sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
Raymond Hettinger027bb632004-05-31 03:09:25 +00001303 copy->weakreflist = NULL;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001304
1305 return (PyObject*) copy;
1306#else
1307 PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1308 return NULL;
1309#endif
1310}
1311
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001312/*[clinic input]
1313_sre.SRE_Pattern.__deepcopy__
1314
1315 memo: object
1316
1317[clinic start generated code]*/
1318
1319static PyObject *
1320_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1321/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001322{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001323#ifdef USE_BUILTIN_COPY
1324 PatternObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00001325
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001326 copy = (PatternObject*) pattern_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001327 if (!copy)
1328 return NULL;
1329
1330 if (!deepcopy(&copy->groupindex, memo) ||
1331 !deepcopy(&copy->indexgroup, memo) ||
1332 !deepcopy(&copy->pattern, memo)) {
1333 Py_DECREF(copy);
1334 return NULL;
1335 }
1336
1337#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001338 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1339 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00001340#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001341}
1342
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001343static PyObject *
1344pattern_repr(PatternObject *obj)
1345{
1346 static const struct {
1347 const char *name;
1348 int value;
1349 } flag_names[] = {
1350 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1351 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1352 {"re.LOCALE", SRE_FLAG_LOCALE},
1353 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1354 {"re.DOTALL", SRE_FLAG_DOTALL},
1355 {"re.UNICODE", SRE_FLAG_UNICODE},
1356 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1357 {"re.DEBUG", SRE_FLAG_DEBUG},
1358 {"re.ASCII", SRE_FLAG_ASCII},
1359 };
1360 PyObject *result = NULL;
1361 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001362 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001363 int flags = obj->flags;
1364
1365 /* Omit re.UNICODE for valid string patterns. */
1366 if (obj->isbytes == 0 &&
1367 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1368 SRE_FLAG_UNICODE)
1369 flags &= ~SRE_FLAG_UNICODE;
1370
1371 flag_items = PyList_New(0);
1372 if (!flag_items)
1373 return NULL;
1374
1375 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1376 if (flags & flag_names[i].value) {
1377 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1378 if (!item)
1379 goto done;
1380
1381 if (PyList_Append(flag_items, item) < 0) {
1382 Py_DECREF(item);
1383 goto done;
1384 }
1385 Py_DECREF(item);
1386 flags &= ~flag_names[i].value;
1387 }
1388 }
1389 if (flags) {
1390 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1391 if (!item)
1392 goto done;
1393
1394 if (PyList_Append(flag_items, item) < 0) {
1395 Py_DECREF(item);
1396 goto done;
1397 }
1398 Py_DECREF(item);
1399 }
1400
1401 if (PyList_Size(flag_items) > 0) {
1402 PyObject *flags_result;
1403 PyObject *sep = PyUnicode_FromString("|");
1404 if (!sep)
1405 goto done;
1406 flags_result = PyUnicode_Join(sep, flag_items);
1407 Py_DECREF(sep);
1408 if (!flags_result)
1409 goto done;
1410 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1411 obj->pattern, flags_result);
1412 Py_DECREF(flags_result);
1413 }
1414 else {
1415 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1416 }
1417
1418done:
1419 Py_DECREF(flag_items);
1420 return result;
1421}
1422
Raymond Hettinger94478742004-09-24 04:31:19 +00001423PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1424
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001425/* PatternObject's 'groupindex' method. */
1426static PyObject *
1427pattern_groupindex(PatternObject *self)
1428{
1429 return PyDictProxy_New(self->groupindex);
1430}
1431
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001432static int _validate(PatternObject *self); /* Forward */
1433
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001434/*[clinic input]
1435_sre.compile
1436
1437 pattern: object
1438 flags: int
1439 code: object(subclass_of='&PyList_Type')
1440 groups: Py_ssize_t
1441 groupindex: object
1442 indexgroup: object
1443
1444[clinic start generated code]*/
1445
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001446static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001447_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001448 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1449 PyObject *indexgroup)
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001450/*[clinic end generated code: output=ef9c2b3693776404 input=7d059ec8ae1edb85]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001451{
1452 /* "compile" pattern descriptor to pattern object */
1453
1454 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001455 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001456
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001457 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001458 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001459 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1460 if (!self)
1461 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001462 self->weakreflist = NULL;
1463 self->pattern = NULL;
1464 self->groupindex = NULL;
1465 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001466
1467 self->codesize = n;
1468
1469 for (i = 0; i < n; i++) {
1470 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001471 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001472 self->code[i] = (SRE_CODE) value;
1473 if ((unsigned long) self->code[i] != value) {
1474 PyErr_SetString(PyExc_OverflowError,
1475 "regular expression code size limit exceeded");
1476 break;
1477 }
1478 }
1479
1480 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001481 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001482 return NULL;
1483 }
1484
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001485 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001486 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001487 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001488 else {
1489 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001490 int charsize;
1491 Py_buffer view;
1492 view.buf = NULL;
1493 if (!getstring(pattern, &p_length, &self->isbytes,
1494 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001495 Py_DECREF(self);
1496 return NULL;
1497 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001498 if (view.buf)
1499 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001500 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001501
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001502 Py_INCREF(pattern);
1503 self->pattern = pattern;
1504
1505 self->flags = flags;
1506
1507 self->groups = groups;
1508
1509 Py_XINCREF(groupindex);
1510 self->groupindex = groupindex;
1511
1512 Py_XINCREF(indexgroup);
1513 self->indexgroup = indexgroup;
1514
1515 self->weakreflist = NULL;
1516
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001517 if (!_validate(self)) {
1518 Py_DECREF(self);
1519 return NULL;
1520 }
1521
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001522 return (PyObject*) self;
1523}
1524
Guido van Rossumb700df92000-03-31 14:59:30 +00001525/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001526/* Code validation */
1527
1528/* To learn more about this code, have a look at the _compile() function in
1529 Lib/sre_compile.py. The validation functions below checks the code array
1530 for conformance with the code patterns generated there.
1531
1532 The nice thing about the generated code is that it is position-independent:
1533 all jumps are relative jumps forward. Also, jumps don't cross each other:
1534 the target of a later jump is always earlier than the target of an earlier
1535 jump. IOW, this is okay:
1536
1537 J---------J-------T--------T
1538 \ \_____/ /
1539 \______________________/
1540
1541 but this is not:
1542
1543 J---------J-------T--------T
1544 \_________\_____/ /
1545 \____________/
1546
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001547 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001548*/
1549
1550/* Defining this one enables tracing of the validator */
1551#undef VVERBOSE
1552
1553/* Trace macro for the validator */
1554#if defined(VVERBOSE)
1555#define VTRACE(v) printf v
1556#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001557#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001558#endif
1559
1560/* Report failure */
1561#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1562
1563/* Extract opcode, argument, or skip count from code array */
1564#define GET_OP \
1565 do { \
1566 VTRACE(("%p: ", code)); \
1567 if (code >= end) FAIL; \
1568 op = *code++; \
1569 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1570 } while (0)
1571#define GET_ARG \
1572 do { \
1573 VTRACE(("%p= ", code)); \
1574 if (code >= end) FAIL; \
1575 arg = *code++; \
1576 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1577 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001578#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001579 do { \
1580 VTRACE(("%p= ", code)); \
1581 if (code >= end) FAIL; \
1582 skip = *code; \
1583 VTRACE(("%lu (skip to %p)\n", \
1584 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001585 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001586 FAIL; \
1587 code++; \
1588 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001589#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001590
1591static int
1592_validate_charset(SRE_CODE *code, SRE_CODE *end)
1593{
1594 /* Some variables are manipulated by the macros above */
1595 SRE_CODE op;
1596 SRE_CODE arg;
1597 SRE_CODE offset;
1598 int i;
1599
1600 while (code < end) {
1601 GET_OP;
1602 switch (op) {
1603
1604 case SRE_OP_NEGATE:
1605 break;
1606
1607 case SRE_OP_LITERAL:
1608 GET_ARG;
1609 break;
1610
1611 case SRE_OP_RANGE:
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +02001612 case SRE_OP_RANGE_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001613 GET_ARG;
1614 GET_ARG;
1615 break;
1616
1617 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001618 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001619 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001620 FAIL;
1621 code += offset;
1622 break;
1623
1624 case SRE_OP_BIGCHARSET:
1625 GET_ARG; /* Number of blocks */
1626 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001627 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001628 FAIL;
1629 /* Make sure that each byte points to a valid block */
1630 for (i = 0; i < 256; i++) {
1631 if (((unsigned char *)code)[i] >= arg)
1632 FAIL;
1633 }
1634 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001635 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001636 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001637 FAIL;
1638 code += offset;
1639 break;
1640
1641 case SRE_OP_CATEGORY:
1642 GET_ARG;
1643 switch (arg) {
1644 case SRE_CATEGORY_DIGIT:
1645 case SRE_CATEGORY_NOT_DIGIT:
1646 case SRE_CATEGORY_SPACE:
1647 case SRE_CATEGORY_NOT_SPACE:
1648 case SRE_CATEGORY_WORD:
1649 case SRE_CATEGORY_NOT_WORD:
1650 case SRE_CATEGORY_LINEBREAK:
1651 case SRE_CATEGORY_NOT_LINEBREAK:
1652 case SRE_CATEGORY_LOC_WORD:
1653 case SRE_CATEGORY_LOC_NOT_WORD:
1654 case SRE_CATEGORY_UNI_DIGIT:
1655 case SRE_CATEGORY_UNI_NOT_DIGIT:
1656 case SRE_CATEGORY_UNI_SPACE:
1657 case SRE_CATEGORY_UNI_NOT_SPACE:
1658 case SRE_CATEGORY_UNI_WORD:
1659 case SRE_CATEGORY_UNI_NOT_WORD:
1660 case SRE_CATEGORY_UNI_LINEBREAK:
1661 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1662 break;
1663 default:
1664 FAIL;
1665 }
1666 break;
1667
1668 default:
1669 FAIL;
1670
1671 }
1672 }
1673
1674 return 1;
1675}
1676
1677static int
1678_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1679{
1680 /* Some variables are manipulated by the macros above */
1681 SRE_CODE op;
1682 SRE_CODE arg;
1683 SRE_CODE skip;
1684
1685 VTRACE(("code=%p, end=%p\n", code, end));
1686
1687 if (code > end)
1688 FAIL;
1689
1690 while (code < end) {
1691 GET_OP;
1692 switch (op) {
1693
1694 case SRE_OP_MARK:
1695 /* We don't check whether marks are properly nested; the
1696 sre_match() code is robust even if they don't, and the worst
1697 you can get is nonsensical match results. */
1698 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001699 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001700 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1701 FAIL;
1702 }
1703 break;
1704
1705 case SRE_OP_LITERAL:
1706 case SRE_OP_NOT_LITERAL:
1707 case SRE_OP_LITERAL_IGNORE:
1708 case SRE_OP_NOT_LITERAL_IGNORE:
1709 GET_ARG;
1710 /* The arg is just a character, nothing to check */
1711 break;
1712
1713 case SRE_OP_SUCCESS:
1714 case SRE_OP_FAILURE:
1715 /* Nothing to check; these normally end the matching process */
1716 break;
1717
1718 case SRE_OP_AT:
1719 GET_ARG;
1720 switch (arg) {
1721 case SRE_AT_BEGINNING:
1722 case SRE_AT_BEGINNING_STRING:
1723 case SRE_AT_BEGINNING_LINE:
1724 case SRE_AT_END:
1725 case SRE_AT_END_LINE:
1726 case SRE_AT_END_STRING:
1727 case SRE_AT_BOUNDARY:
1728 case SRE_AT_NON_BOUNDARY:
1729 case SRE_AT_LOC_BOUNDARY:
1730 case SRE_AT_LOC_NON_BOUNDARY:
1731 case SRE_AT_UNI_BOUNDARY:
1732 case SRE_AT_UNI_NON_BOUNDARY:
1733 break;
1734 default:
1735 FAIL;
1736 }
1737 break;
1738
1739 case SRE_OP_ANY:
1740 case SRE_OP_ANY_ALL:
1741 /* These have no operands */
1742 break;
1743
1744 case SRE_OP_IN:
1745 case SRE_OP_IN_IGNORE:
1746 GET_SKIP;
1747 /* Stop 1 before the end; we check the FAILURE below */
1748 if (!_validate_charset(code, code+skip-2))
1749 FAIL;
1750 if (code[skip-2] != SRE_OP_FAILURE)
1751 FAIL;
1752 code += skip-1;
1753 break;
1754
1755 case SRE_OP_INFO:
1756 {
1757 /* A minimal info field is
1758 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1759 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1760 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001761 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001762 SRE_CODE *newcode;
1763 GET_SKIP;
1764 newcode = code+skip-1;
1765 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001766 GET_ARG;
1767 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001768 /* Check that only valid flags are present */
1769 if ((flags & ~(SRE_INFO_PREFIX |
1770 SRE_INFO_LITERAL |
1771 SRE_INFO_CHARSET)) != 0)
1772 FAIL;
1773 /* PREFIX and CHARSET are mutually exclusive */
1774 if ((flags & SRE_INFO_PREFIX) &&
1775 (flags & SRE_INFO_CHARSET))
1776 FAIL;
1777 /* LITERAL implies PREFIX */
1778 if ((flags & SRE_INFO_LITERAL) &&
1779 !(flags & SRE_INFO_PREFIX))
1780 FAIL;
1781 /* Validate the prefix */
1782 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001783 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001784 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001785 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001786 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001787 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001788 FAIL;
1789 code += prefix_len;
1790 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001791 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001792 FAIL;
1793 /* Each overlap value should be < prefix_len */
1794 for (i = 0; i < prefix_len; i++) {
1795 if (code[i] >= prefix_len)
1796 FAIL;
1797 }
1798 code += prefix_len;
1799 }
1800 /* Validate the charset */
1801 if (flags & SRE_INFO_CHARSET) {
1802 if (!_validate_charset(code, newcode-1))
1803 FAIL;
1804 if (newcode[-1] != SRE_OP_FAILURE)
1805 FAIL;
1806 code = newcode;
1807 }
1808 else if (code != newcode) {
1809 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1810 FAIL;
1811 }
1812 }
1813 break;
1814
1815 case SRE_OP_BRANCH:
1816 {
1817 SRE_CODE *target = NULL;
1818 for (;;) {
1819 GET_SKIP;
1820 if (skip == 0)
1821 break;
1822 /* Stop 2 before the end; we check the JUMP below */
1823 if (!_validate_inner(code, code+skip-3, groups))
1824 FAIL;
1825 code += skip-3;
1826 /* Check that it ends with a JUMP, and that each JUMP
1827 has the same target */
1828 GET_OP;
1829 if (op != SRE_OP_JUMP)
1830 FAIL;
1831 GET_SKIP;
1832 if (target == NULL)
1833 target = code+skip-1;
1834 else if (code+skip-1 != target)
1835 FAIL;
1836 }
1837 }
1838 break;
1839
1840 case SRE_OP_REPEAT_ONE:
1841 case SRE_OP_MIN_REPEAT_ONE:
1842 {
1843 SRE_CODE min, max;
1844 GET_SKIP;
1845 GET_ARG; min = arg;
1846 GET_ARG; max = arg;
1847 if (min > max)
1848 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001849 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001850 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001851 if (!_validate_inner(code, code+skip-4, groups))
1852 FAIL;
1853 code += skip-4;
1854 GET_OP;
1855 if (op != SRE_OP_SUCCESS)
1856 FAIL;
1857 }
1858 break;
1859
1860 case SRE_OP_REPEAT:
1861 {
1862 SRE_CODE min, max;
1863 GET_SKIP;
1864 GET_ARG; min = arg;
1865 GET_ARG; max = arg;
1866 if (min > max)
1867 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001868 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001869 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001870 if (!_validate_inner(code, code+skip-3, groups))
1871 FAIL;
1872 code += skip-3;
1873 GET_OP;
1874 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1875 FAIL;
1876 }
1877 break;
1878
1879 case SRE_OP_GROUPREF:
1880 case SRE_OP_GROUPREF_IGNORE:
1881 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001882 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001883 FAIL;
1884 break;
1885
1886 case SRE_OP_GROUPREF_EXISTS:
1887 /* The regex syntax for this is: '(?(group)then|else)', where
1888 'group' is either an integer group number or a group name,
1889 'then' and 'else' are sub-regexes, and 'else' is optional. */
1890 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001891 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001892 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001893 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001894 code--; /* The skip is relative to the first arg! */
1895 /* There are two possibilities here: if there is both a 'then'
1896 part and an 'else' part, the generated code looks like:
1897
1898 GROUPREF_EXISTS
1899 <group>
1900 <skipyes>
1901 ...then part...
1902 JUMP
1903 <skipno>
1904 (<skipyes> jumps here)
1905 ...else part...
1906 (<skipno> jumps here)
1907
1908 If there is only a 'then' part, it looks like:
1909
1910 GROUPREF_EXISTS
1911 <group>
1912 <skip>
1913 ...then part...
1914 (<skip> jumps here)
1915
1916 There is no direct way to decide which it is, and we don't want
1917 to allow arbitrary jumps anywhere in the code; so we just look
1918 for a JUMP opcode preceding our skip target.
1919 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001920 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001921 code[skip-3] == SRE_OP_JUMP)
1922 {
1923 VTRACE(("both then and else parts present\n"));
1924 if (!_validate_inner(code+1, code+skip-3, groups))
1925 FAIL;
1926 code += skip-2; /* Position after JUMP, at <skipno> */
1927 GET_SKIP;
1928 if (!_validate_inner(code, code+skip-1, groups))
1929 FAIL;
1930 code += skip-1;
1931 }
1932 else {
1933 VTRACE(("only a then part present\n"));
1934 if (!_validate_inner(code+1, code+skip-1, groups))
1935 FAIL;
1936 code += skip-1;
1937 }
1938 break;
1939
1940 case SRE_OP_ASSERT:
1941 case SRE_OP_ASSERT_NOT:
1942 GET_SKIP;
1943 GET_ARG; /* 0 for lookahead, width for lookbehind */
1944 code--; /* Back up over arg to simplify math below */
1945 if (arg & 0x80000000)
1946 FAIL; /* Width too large */
1947 /* Stop 1 before the end; we check the SUCCESS below */
1948 if (!_validate_inner(code+1, code+skip-2, groups))
1949 FAIL;
1950 code += skip-2;
1951 GET_OP;
1952 if (op != SRE_OP_SUCCESS)
1953 FAIL;
1954 break;
1955
1956 default:
1957 FAIL;
1958
1959 }
1960 }
1961
1962 VTRACE(("okay\n"));
1963 return 1;
1964}
1965
1966static int
1967_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1968{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001969 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1970 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001971 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001972 return _validate_inner(code, end-1, groups);
1973}
1974
1975static int
1976_validate(PatternObject *self)
1977{
1978 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1979 {
1980 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1981 return 0;
1982 }
1983 else
1984 VTRACE(("Success!\n"));
1985 return 1;
1986}
1987
1988/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001989/* match methods */
1990
1991static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001992match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001993{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001994 Py_XDECREF(self->regs);
1995 Py_XDECREF(self->string);
1996 Py_DECREF(self->pattern);
1997 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001998}
1999
2000static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002001match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00002002{
Serhiy Storchaka25324972013-10-16 12:46:28 +03002003 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002004 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03002005 Py_buffer view;
2006 PyObject *result;
2007 void* ptr;
2008
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002009 if (index < 0 || index >= self->groups) {
2010 /* raise IndexError if we were given a bad group number */
2011 PyErr_SetString(
2012 PyExc_IndexError,
2013 "no such group"
2014 );
2015 return NULL;
2016 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002017
Fredrik Lundh6f013982000-07-03 18:44:21 +00002018 index *= 2;
2019
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002020 if (self->string == Py_None || self->mark[index] < 0) {
2021 /* return default value if the string or group is undefined */
2022 Py_INCREF(def);
2023 return def;
2024 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002025
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002026 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03002027 if (ptr == NULL)
2028 return NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002029 result = getslice(isbytes, ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +03002030 self->string, self->mark[index], self->mark[index+1]);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002031 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03002032 PyBuffer_Release(&view);
2033 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002034}
2035
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002036static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002037match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00002038{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002039 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00002040
Guido van Rossumddefaf32007-01-14 03:31:43 +00002041 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002042 /* Default value */
2043 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00002044
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03002045 if (PyIndex_Check(index)) {
2046 return PyNumber_AsSsize_t(index, NULL);
2047 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002048
Fredrik Lundh6f013982000-07-03 18:44:21 +00002049 i = -1;
2050
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002051 if (self->pattern->groupindex) {
2052 index = PyObject_GetItem(self->pattern->groupindex, index);
2053 if (index) {
Neal Norwitz1fe5f382007-08-31 04:32:55 +00002054 if (PyLong_Check(index))
Christian Heimes217cfd12007-12-02 14:31:20 +00002055 i = PyLong_AsSsize_t(index);
Fredrik Lundh6f013982000-07-03 18:44:21 +00002056 Py_DECREF(index);
2057 } else
2058 PyErr_Clear();
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002059 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00002060
2061 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002062}
2063
2064static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002065match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002066{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002067 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00002068}
2069
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002070/*[clinic input]
2071_sre.SRE_Match.expand
2072
2073 template: object
2074
2075Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2076[clinic start generated code]*/
2077
2078static PyObject *
2079_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2080/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002081{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002082 /* delegate to Python code */
2083 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002084 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002085 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002086 );
2087}
2088
2089static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002090match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002091{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002092 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002093 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002094
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002095 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002096
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002097 switch (size) {
2098 case 0:
2099 result = match_getslice(self, Py_False, Py_None);
2100 break;
2101 case 1:
2102 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2103 break;
2104 default:
2105 /* fetch multiple items */
2106 result = PyTuple_New(size);
2107 if (!result)
2108 return NULL;
2109 for (i = 0; i < size; i++) {
2110 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002111 self, PyTuple_GET_ITEM(args, i), Py_None
2112 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002113 if (!item) {
2114 Py_DECREF(result);
2115 return NULL;
2116 }
2117 PyTuple_SET_ITEM(result, i, item);
2118 }
2119 break;
2120 }
2121 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002122}
2123
Eric V. Smith605bdae2016-09-11 08:55:43 -04002124static PyObject*
2125match_getitem(MatchObject* self, PyObject* name)
2126{
2127 return match_getslice(self, name, Py_None);
2128}
2129
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002130/*[clinic input]
2131_sre.SRE_Match.groups
2132
2133 default: object = None
2134 Is used for groups that did not participate in the match.
2135
2136Return a tuple containing all the subgroups of the match, from 1.
2137[clinic start generated code]*/
2138
2139static PyObject *
2140_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2141/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002142{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002143 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002144 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002145
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002146 result = PyTuple_New(self->groups-1);
2147 if (!result)
2148 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002149
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002150 for (index = 1; index < self->groups; index++) {
2151 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002152 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 if (!item) {
2154 Py_DECREF(result);
2155 return NULL;
2156 }
2157 PyTuple_SET_ITEM(result, index-1, item);
2158 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002159
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002160 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002161}
2162
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002163/*[clinic input]
2164_sre.SRE_Match.groupdict
2165
2166 default: object = None
2167 Is used for groups that did not participate in the match.
2168
2169Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2170[clinic start generated code]*/
2171
2172static PyObject *
2173_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2174/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002175{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002176 PyObject* result;
2177 PyObject* keys;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002178 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002179
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 result = PyDict_New();
2181 if (!result || !self->pattern->groupindex)
2182 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002183
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002184 keys = PyMapping_Keys(self->pattern->groupindex);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002185 if (!keys)
2186 goto failed;
Guido van Rossumb700df92000-03-31 14:59:30 +00002187
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002188 for (index = 0; index < PyList_GET_SIZE(keys); index++) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002189 int status;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002190 PyObject* key;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002191 PyObject* value;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002192 key = PyList_GET_ITEM(keys, index);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002193 if (!key)
2194 goto failed;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002195 value = match_getslice(self, key, default_value);
Benjamin Peterson3a27b082016-08-15 22:01:41 -07002196 if (!value)
Fredrik Lundh770617b2001-01-14 15:06:11 +00002197 goto failed;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002198 status = PyDict_SetItem(result, key, value);
2199 Py_DECREF(value);
2200 if (status < 0)
2201 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002202 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002203
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002204 Py_DECREF(keys);
Guido van Rossumb700df92000-03-31 14:59:30 +00002205
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002206 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002207
2208failed:
Neal Norwitz60da3162006-03-07 04:48:24 +00002209 Py_XDECREF(keys);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002210 Py_DECREF(result);
2211 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002212}
2213
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002214/*[clinic input]
2215_sre.SRE_Match.start -> Py_ssize_t
2216
2217 group: object(c_default="NULL") = 0
2218 /
2219
2220Return index of the start of the substring matched by group.
2221[clinic start generated code]*/
2222
2223static Py_ssize_t
2224_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2225/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002226{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002227 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002228
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002229 if (index < 0 || index >= self->groups) {
2230 PyErr_SetString(
2231 PyExc_IndexError,
2232 "no such group"
2233 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002234 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002235 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002236
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002237 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002238 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002239}
2240
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002241/*[clinic input]
2242_sre.SRE_Match.end -> Py_ssize_t
2243
2244 group: object(c_default="NULL") = 0
2245 /
2246
2247Return index of the end of the substring matched by group.
2248[clinic start generated code]*/
2249
2250static Py_ssize_t
2251_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2252/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002253{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002254 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002255
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002256 if (index < 0 || index >= self->groups) {
2257 PyErr_SetString(
2258 PyExc_IndexError,
2259 "no such group"
2260 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002261 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002262 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002263
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002264 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002265 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002266}
2267
2268LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002269_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002270{
2271 PyObject* pair;
2272 PyObject* item;
2273
2274 pair = PyTuple_New(2);
2275 if (!pair)
2276 return NULL;
2277
Christian Heimes217cfd12007-12-02 14:31:20 +00002278 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002279 if (!item)
2280 goto error;
2281 PyTuple_SET_ITEM(pair, 0, item);
2282
Christian Heimes217cfd12007-12-02 14:31:20 +00002283 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002284 if (!item)
2285 goto error;
2286 PyTuple_SET_ITEM(pair, 1, item);
2287
2288 return pair;
2289
2290 error:
2291 Py_DECREF(pair);
2292 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002293}
2294
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002295/*[clinic input]
2296_sre.SRE_Match.span
2297
2298 group: object(c_default="NULL") = 0
2299 /
2300
2301For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2302[clinic start generated code]*/
2303
2304static PyObject *
2305_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2306/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002307{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002308 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002309
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002310 if (index < 0 || index >= self->groups) {
2311 PyErr_SetString(
2312 PyExc_IndexError,
2313 "no such group"
2314 );
2315 return NULL;
2316 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002317
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002318 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002319 return _pair(self->mark[index*2], self->mark[index*2+1]);
2320}
2321
2322static PyObject*
2323match_regs(MatchObject* self)
2324{
2325 PyObject* regs;
2326 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002327 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002328
2329 regs = PyTuple_New(self->groups);
2330 if (!regs)
2331 return NULL;
2332
2333 for (index = 0; index < self->groups; index++) {
2334 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2335 if (!item) {
2336 Py_DECREF(regs);
2337 return NULL;
2338 }
2339 PyTuple_SET_ITEM(regs, index, item);
2340 }
2341
2342 Py_INCREF(regs);
2343 self->regs = regs;
2344
2345 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002346}
2347
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002348/*[clinic input]
2349_sre.SRE_Match.__copy__
2350
2351[clinic start generated code]*/
2352
2353static PyObject *
2354_sre_SRE_Match___copy___impl(MatchObject *self)
2355/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002356{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002357#ifdef USE_BUILTIN_COPY
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002358 MatchObject* copy;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002359 Py_ssize_t slots, offset;
Tim Peters3d563502006-01-21 02:47:53 +00002360
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002361 slots = 2 * (self->pattern->groups+1);
2362
2363 copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2364 if (!copy)
2365 return NULL;
2366
2367 /* this value a constant, but any compiler should be able to
2368 figure that out all by itself */
2369 offset = offsetof(MatchObject, string);
2370
2371 Py_XINCREF(self->pattern);
2372 Py_XINCREF(self->string);
2373 Py_XINCREF(self->regs);
2374
2375 memcpy((char*) copy + offset, (char*) self + offset,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002376 sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002377
2378 return (PyObject*) copy;
2379#else
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002380 PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002381 return NULL;
2382#endif
2383}
2384
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002385/*[clinic input]
2386_sre.SRE_Match.__deepcopy__
2387
2388 memo: object
2389
2390[clinic start generated code]*/
2391
2392static PyObject *
2393_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2394/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002395{
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002396#ifdef USE_BUILTIN_COPY
2397 MatchObject* copy;
Tim Peters3d563502006-01-21 02:47:53 +00002398
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002399 copy = (MatchObject*) match_copy(self);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002400 if (!copy)
2401 return NULL;
2402
2403 if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2404 !deepcopy(&copy->string, memo) ||
2405 !deepcopy(&copy->regs, memo)) {
2406 Py_DECREF(copy);
2407 return NULL;
2408 }
2409
2410#else
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002411 PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2412 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +00002413#endif
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002414}
2415
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002416PyDoc_STRVAR(match_doc,
2417"The result of re.match() and re.search().\n\
2418Match objects always have a boolean value of True.");
2419
2420PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002421"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002422 Return subgroup(s) of the match by indices or names.\n\
2423 For 0 returns the entire match.");
2424
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002425static PyObject *
2426match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002427{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002428 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002429 return PyLong_FromSsize_t(self->lastindex);
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002430 Py_INCREF(Py_None);
2431 return Py_None;
Guido van Rossumb700df92000-03-31 14:59:30 +00002432}
2433
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002434static PyObject *
2435match_lastgroup_get(MatchObject *self)
2436{
2437 if (self->pattern->indexgroup && self->lastindex >= 0) {
2438 PyObject* result = PySequence_GetItem(
2439 self->pattern->indexgroup, self->lastindex
2440 );
2441 if (result)
2442 return result;
2443 PyErr_Clear();
2444 }
2445 Py_INCREF(Py_None);
2446 return Py_None;
2447}
2448
2449static PyObject *
2450match_regs_get(MatchObject *self)
2451{
2452 if (self->regs) {
2453 Py_INCREF(self->regs);
2454 return self->regs;
2455 } else
2456 return match_regs(self);
2457}
2458
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002459static PyObject *
2460match_repr(MatchObject *self)
2461{
2462 PyObject *result;
2463 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2464 if (group0 == NULL)
2465 return NULL;
2466 result = PyUnicode_FromFormat(
2467 "<%s object; span=(%d, %d), match=%.50R>",
2468 Py_TYPE(self)->tp_name,
2469 self->mark[0], self->mark[1], group0);
2470 Py_DECREF(group0);
2471 return result;
2472}
2473
2474
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002475static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002476pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002477{
2478 /* create match object (from state object) */
2479
2480 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002481 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002482 char* base;
2483 int n;
2484
2485 if (status > 0) {
2486
2487 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002488 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002489 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2490 2*(pattern->groups+1));
2491 if (!match)
2492 return NULL;
2493
2494 Py_INCREF(pattern);
2495 match->pattern = pattern;
2496
2497 Py_INCREF(state->string);
2498 match->string = state->string;
2499
2500 match->regs = NULL;
2501 match->groups = pattern->groups+1;
2502
2503 /* fill in group slices */
2504
2505 base = (char*) state->beginning;
2506 n = state->charsize;
2507
2508 match->mark[0] = ((char*) state->start - base) / n;
2509 match->mark[1] = ((char*) state->ptr - base) / n;
2510
2511 for (i = j = 0; i < pattern->groups; i++, j+=2)
2512 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2513 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2514 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2515 } else
2516 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2517
2518 match->pos = state->pos;
2519 match->endpos = state->endpos;
2520
2521 match->lastindex = state->lastindex;
2522
2523 return (PyObject*) match;
2524
2525 } else if (status == 0) {
2526
2527 /* no match */
2528 Py_INCREF(Py_None);
2529 return Py_None;
2530
2531 }
2532
2533 /* internal error */
2534 pattern_error(status);
2535 return NULL;
2536}
2537
2538
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002539/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002540/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002541
2542static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002543scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002544{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002545 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002546 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002547 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002548}
2549
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002550/*[clinic input]
2551_sre.SRE_Scanner.match
2552
2553[clinic start generated code]*/
2554
2555static PyObject *
2556_sre_SRE_Scanner_match_impl(ScannerObject *self)
2557/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002558{
2559 SRE_STATE* state = &self->state;
2560 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002561 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002562
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002563 if (state->start == NULL)
2564 Py_RETURN_NONE;
2565
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002566 state_reset(state);
2567
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002568 state->ptr = state->start;
2569
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002570 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002571 if (PyErr_Occurred())
2572 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002573
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002574 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002575 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002576
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002577 if (status == 0)
2578 state->start = NULL;
2579 else if (state->ptr != state->start)
2580 state->start = state->ptr;
2581 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002582 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002583 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002584 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002585
2586 return match;
2587}
2588
2589
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002590/*[clinic input]
2591_sre.SRE_Scanner.search
2592
2593[clinic start generated code]*/
2594
2595static PyObject *
2596_sre_SRE_Scanner_search_impl(ScannerObject *self)
2597/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002598{
2599 SRE_STATE* state = &self->state;
2600 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002601 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002602
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002603 if (state->start == NULL)
2604 Py_RETURN_NONE;
2605
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002606 state_reset(state);
2607
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002608 state->ptr = state->start;
2609
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002610 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002611 if (PyErr_Occurred())
2612 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002613
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002614 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002615 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002616
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002617 if (status == 0)
2618 state->start = NULL;
2619 else if (state->ptr != state->start)
2620 state->start = state->ptr;
2621 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002622 state->start = (void*) ((char*) state->ptr + state->charsize);
2623 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002624 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002625
2626 return match;
2627}
2628
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002629static PyObject *
2630pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002631{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002632 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002633
2634 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002635 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2636 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002637 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002638 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002639
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002640 /* create search state object */
2641 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2642 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002643 return NULL;
2644 }
2645
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002646 Py_INCREF(self);
2647 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002648
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002649 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002650}
2651
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002652#include "clinic/_sre.c.h"
2653
2654static PyMethodDef pattern_methods[] = {
2655 _SRE_SRE_PATTERN_MATCH_METHODDEF
2656 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2657 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2658 _SRE_SRE_PATTERN_SUB_METHODDEF
2659 _SRE_SRE_PATTERN_SUBN_METHODDEF
2660 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2661 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2662 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2663 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2664 _SRE_SRE_PATTERN___COPY___METHODDEF
2665 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2666 {NULL, NULL}
2667};
2668
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002669static PyGetSetDef pattern_getset[] = {
2670 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2671 "A dictionary mapping group names to group numbers."},
2672 {NULL} /* Sentinel */
2673};
2674
2675#define PAT_OFF(x) offsetof(PatternObject, x)
2676static PyMemberDef pattern_members[] = {
2677 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2678 {"flags", T_INT, PAT_OFF(flags), READONLY},
2679 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2680 {NULL} /* Sentinel */
2681};
2682
2683static PyTypeObject Pattern_Type = {
2684 PyVarObject_HEAD_INIT(NULL, 0)
2685 "_" SRE_MODULE ".SRE_Pattern",
2686 sizeof(PatternObject), sizeof(SRE_CODE),
2687 (destructor)pattern_dealloc, /* tp_dealloc */
2688 0, /* tp_print */
2689 0, /* tp_getattr */
2690 0, /* tp_setattr */
2691 0, /* tp_reserved */
2692 (reprfunc)pattern_repr, /* tp_repr */
2693 0, /* tp_as_number */
2694 0, /* tp_as_sequence */
2695 0, /* tp_as_mapping */
2696 0, /* tp_hash */
2697 0, /* tp_call */
2698 0, /* tp_str */
2699 0, /* tp_getattro */
2700 0, /* tp_setattro */
2701 0, /* tp_as_buffer */
2702 Py_TPFLAGS_DEFAULT, /* tp_flags */
2703 pattern_doc, /* tp_doc */
2704 0, /* tp_traverse */
2705 0, /* tp_clear */
2706 0, /* tp_richcompare */
2707 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2708 0, /* tp_iter */
2709 0, /* tp_iternext */
2710 pattern_methods, /* tp_methods */
2711 pattern_members, /* tp_members */
2712 pattern_getset, /* tp_getset */
2713};
2714
Eric V. Smith605bdae2016-09-11 08:55:43 -04002715/* Match objects do not support length or assignment, but do support
2716 __getitem__. */
2717static PyMappingMethods match_as_mapping = {
2718 NULL,
2719 (binaryfunc)match_getitem,
2720 NULL
2721};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002722
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002723static PyMethodDef match_methods[] = {
2724 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2725 _SRE_SRE_MATCH_START_METHODDEF
2726 _SRE_SRE_MATCH_END_METHODDEF
2727 _SRE_SRE_MATCH_SPAN_METHODDEF
2728 _SRE_SRE_MATCH_GROUPS_METHODDEF
2729 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2730 _SRE_SRE_MATCH_EXPAND_METHODDEF
2731 _SRE_SRE_MATCH___COPY___METHODDEF
2732 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2733 {NULL, NULL}
2734};
2735
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002736static PyGetSetDef match_getset[] = {
2737 {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2738 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2739 {"regs", (getter)match_regs_get, (setter)NULL},
2740 {NULL}
2741};
2742
2743#define MATCH_OFF(x) offsetof(MatchObject, x)
2744static PyMemberDef match_members[] = {
2745 {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2746 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2747 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2748 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2749 {NULL}
2750};
2751
2752/* FIXME: implement setattr("string", None) as a special case (to
2753 detach the associated string, if any */
2754
2755static PyTypeObject Match_Type = {
2756 PyVarObject_HEAD_INIT(NULL,0)
2757 "_" SRE_MODULE ".SRE_Match",
2758 sizeof(MatchObject), sizeof(Py_ssize_t),
2759 (destructor)match_dealloc, /* tp_dealloc */
2760 0, /* tp_print */
2761 0, /* tp_getattr */
2762 0, /* tp_setattr */
2763 0, /* tp_reserved */
2764 (reprfunc)match_repr, /* tp_repr */
2765 0, /* tp_as_number */
2766 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002767 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002768 0, /* tp_hash */
2769 0, /* tp_call */
2770 0, /* tp_str */
2771 0, /* tp_getattro */
2772 0, /* tp_setattro */
2773 0, /* tp_as_buffer */
2774 Py_TPFLAGS_DEFAULT, /* tp_flags */
2775 match_doc, /* tp_doc */
2776 0, /* tp_traverse */
2777 0, /* tp_clear */
2778 0, /* tp_richcompare */
2779 0, /* tp_weaklistoffset */
2780 0, /* tp_iter */
2781 0, /* tp_iternext */
2782 match_methods, /* tp_methods */
2783 match_members, /* tp_members */
2784 match_getset, /* tp_getset */
2785};
2786
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002787static PyMethodDef scanner_methods[] = {
2788 _SRE_SRE_SCANNER_MATCH_METHODDEF
2789 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2790 {NULL, NULL}
2791};
2792
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002793#define SCAN_OFF(x) offsetof(ScannerObject, x)
2794static PyMemberDef scanner_members[] = {
2795 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2796 {NULL} /* Sentinel */
2797};
2798
2799static PyTypeObject Scanner_Type = {
2800 PyVarObject_HEAD_INIT(NULL, 0)
2801 "_" SRE_MODULE ".SRE_Scanner",
2802 sizeof(ScannerObject), 0,
2803 (destructor)scanner_dealloc,/* tp_dealloc */
2804 0, /* tp_print */
2805 0, /* tp_getattr */
2806 0, /* tp_setattr */
2807 0, /* tp_reserved */
2808 0, /* tp_repr */
2809 0, /* tp_as_number */
2810 0, /* tp_as_sequence */
2811 0, /* tp_as_mapping */
2812 0, /* tp_hash */
2813 0, /* tp_call */
2814 0, /* tp_str */
2815 0, /* tp_getattro */
2816 0, /* tp_setattro */
2817 0, /* tp_as_buffer */
2818 Py_TPFLAGS_DEFAULT, /* tp_flags */
2819 0, /* tp_doc */
2820 0, /* tp_traverse */
2821 0, /* tp_clear */
2822 0, /* tp_richcompare */
2823 0, /* tp_weaklistoffset */
2824 0, /* tp_iter */
2825 0, /* tp_iternext */
2826 scanner_methods, /* tp_methods */
2827 scanner_members, /* tp_members */
2828 0, /* tp_getset */
2829};
2830
Guido van Rossumb700df92000-03-31 14:59:30 +00002831static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002832 _SRE_COMPILE_METHODDEF
2833 _SRE_GETCODESIZE_METHODDEF
2834 _SRE_GETLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002835 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002836};
2837
Martin v. Löwis1a214512008-06-11 05:26:20 +00002838static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002839 PyModuleDef_HEAD_INIT,
2840 "_" SRE_MODULE,
2841 NULL,
2842 -1,
2843 _functions,
2844 NULL,
2845 NULL,
2846 NULL,
2847 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002848};
2849
2850PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002851{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002852 PyObject* m;
2853 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002854 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002855
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002856 /* Patch object types */
2857 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2858 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002859 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002860
Martin v. Löwis1a214512008-06-11 05:26:20 +00002861 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002862 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002863 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002864 d = PyModule_GetDict(m);
2865
Christian Heimes217cfd12007-12-02 14:31:20 +00002866 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002867 if (x) {
2868 PyDict_SetItemString(d, "MAGIC", x);
2869 Py_DECREF(x);
2870 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002871
Christian Heimes217cfd12007-12-02 14:31:20 +00002872 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002873 if (x) {
2874 PyDict_SetItemString(d, "CODESIZE", x);
2875 Py_DECREF(x);
2876 }
2877
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002878 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2879 if (x) {
2880 PyDict_SetItemString(d, "MAXREPEAT", x);
2881 Py_DECREF(x);
2882 }
2883
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002884 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2885 if (x) {
2886 PyDict_SetItemString(d, "MAXGROUPS", x);
2887 Py_DECREF(x);
2888 }
2889
Neal Norwitzfe537132007-08-26 03:55:15 +00002890 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002891 if (x) {
2892 PyDict_SetItemString(d, "copyright", x);
2893 Py_DECREF(x);
2894 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002895 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002896}
2897
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002898/* vim:ts=4:sw=4:et
2899*/