blob: a9b6b50e84e69af661ac26b33319ffec82ad6516 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh80946112000-06-29 18:03:25 +000064#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000065#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000066#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* fastest possible local call under MSVC */
68#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#else
Benjamin Peterson791dc832017-04-20 23:52:19 -070070#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000071#endif
72
73/* error codes */
74#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000077#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000078#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000081#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000082#else
83#define TRACE(v)
84#endif
85
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000086/* -------------------------------------------------------------------- */
87/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000088
Fredrik Lundh436c3d582000-06-29 08:58:44 +000089#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030090 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000091#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030092 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030094 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030096 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300100static unsigned int sre_lower_ascii(unsigned int ch)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000101{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000103}
104
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300105static unsigned int sre_upper_ascii(unsigned int ch)
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200106{
107 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
108}
109
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000110/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000111/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
112 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000113#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000114#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
115
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000116static unsigned int sre_lower_locale(unsigned int ch)
117{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000118 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000119}
120
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200121static unsigned int sre_upper_locale(unsigned int ch)
122{
123 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
124}
125
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000126/* unicode-specific character predicates */
127
Victor Stinner0058b862011-09-29 03:27:47 +0200128#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
129#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
130#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
131#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
132#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000133
134static unsigned int sre_lower_unicode(unsigned int ch)
135{
Victor Stinner0058b862011-09-29 03:27:47 +0200136 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000137}
138
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200139static unsigned int sre_upper_unicode(unsigned int ch)
140{
141 return (unsigned int) Py_UNICODE_TOUPPER(ch);
142}
143
Guido van Rossumb700df92000-03-31 14:59:30 +0000144LOCAL(int)
145sre_category(SRE_CODE category, unsigned int ch)
146{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000147 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000149 case SRE_CATEGORY_DIGIT:
150 return SRE_IS_DIGIT(ch);
151 case SRE_CATEGORY_NOT_DIGIT:
152 return !SRE_IS_DIGIT(ch);
153 case SRE_CATEGORY_SPACE:
154 return SRE_IS_SPACE(ch);
155 case SRE_CATEGORY_NOT_SPACE:
156 return !SRE_IS_SPACE(ch);
157 case SRE_CATEGORY_WORD:
158 return SRE_IS_WORD(ch);
159 case SRE_CATEGORY_NOT_WORD:
160 return !SRE_IS_WORD(ch);
161 case SRE_CATEGORY_LINEBREAK:
162 return SRE_IS_LINEBREAK(ch);
163 case SRE_CATEGORY_NOT_LINEBREAK:
164 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000165
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000166 case SRE_CATEGORY_LOC_WORD:
167 return SRE_LOC_IS_WORD(ch);
168 case SRE_CATEGORY_LOC_NOT_WORD:
169 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000170
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 case SRE_CATEGORY_UNI_DIGIT:
172 return SRE_UNI_IS_DIGIT(ch);
173 case SRE_CATEGORY_UNI_NOT_DIGIT:
174 return !SRE_UNI_IS_DIGIT(ch);
175 case SRE_CATEGORY_UNI_SPACE:
176 return SRE_UNI_IS_SPACE(ch);
177 case SRE_CATEGORY_UNI_NOT_SPACE:
178 return !SRE_UNI_IS_SPACE(ch);
179 case SRE_CATEGORY_UNI_WORD:
180 return SRE_UNI_IS_WORD(ch);
181 case SRE_CATEGORY_UNI_NOT_WORD:
182 return !SRE_UNI_IS_WORD(ch);
183 case SRE_CATEGORY_UNI_LINEBREAK:
184 return SRE_UNI_IS_LINEBREAK(ch);
185 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
186 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000187 }
188 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000189}
190
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300191LOCAL(int)
192char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
193{
194 return ch == pattern
195 || (SRE_CODE) sre_lower_locale(ch) == pattern
196 || (SRE_CODE) sre_upper_locale(ch) == pattern;
197}
198
199
Guido van Rossumb700df92000-03-31 14:59:30 +0000200/* helpers */
201
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000202static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000203data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000204{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000206 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000208 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000210}
211
212static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000214{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000215 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000216 minsize = state->data_stack_base+size;
217 cursize = state->data_stack_size;
218 if (cursize < minsize) {
219 void* stack;
220 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300221 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000222 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000223 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000224 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225 return SRE_ERROR_MEMORY;
226 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000227 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000228 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000230 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000231}
232
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000233/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000234
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300235#define SRE_CHAR Py_UCS1
236#define SIZEOF_SRE_CHAR 1
237#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300238#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000239
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300240/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000241
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300242#define SRE_CHAR Py_UCS2
243#define SIZEOF_SRE_CHAR 2
244#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300245#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000246
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300247/* generate 32-bit unicode version */
248
249#define SRE_CHAR Py_UCS4
250#define SIZEOF_SRE_CHAR 4
251#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300252#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000253
254/* -------------------------------------------------------------------- */
255/* factories and destructors */
256
257/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100258static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300259static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000260
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300261
262/*[clinic input]
263module _sre
264class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
265class _sre.SRE_Match "MatchObject *" "&Match_Type"
266class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
267[clinic start generated code]*/
268/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
269
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700270static PyTypeObject Pattern_Type;
271static PyTypeObject Match_Type;
272static PyTypeObject Scanner_Type;
273
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300274/*[clinic input]
275_sre.getcodesize -> int
276[clinic start generated code]*/
277
278static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300279_sre_getcodesize_impl(PyObject *module)
280/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000281{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300282 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000283}
284
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300285/*[clinic input]
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300286_sre.ascii_iscased -> bool
287
288 character: int
289 /
290
291[clinic start generated code]*/
292
293static int
294_sre_ascii_iscased_impl(PyObject *module, int character)
295/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
296{
297 unsigned int ch = (unsigned int)character;
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300298 return ch != sre_lower_ascii(ch) || ch != sre_upper_ascii(ch);
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300299}
300
301/*[clinic input]
302_sre.unicode_iscased -> bool
303
304 character: int
305 /
306
307[clinic start generated code]*/
308
309static int
310_sre_unicode_iscased_impl(PyObject *module, int character)
311/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
312{
313 unsigned int ch = (unsigned int)character;
314 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
315}
316
317/*[clinic input]
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300318_sre.ascii_tolower -> int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300319
320 character: int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300321 /
322
323[clinic start generated code]*/
324
325static int
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300326_sre_ascii_tolower_impl(PyObject *module, int character)
327/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000328{
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300329 return sre_lower_ascii(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000330}
331
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300332/*[clinic input]
333_sre.unicode_tolower -> int
334
335 character: int
336 /
337
338[clinic start generated code]*/
339
340static int
341_sre_unicode_tolower_impl(PyObject *module, int character)
342/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
343{
344 return sre_lower_unicode(character);
345}
346
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000347LOCAL(void)
348state_reset(SRE_STATE* state)
349{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000350 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000351 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000352
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000353 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000354 state->lastindex = -1;
355
356 state->repeat = NULL;
357
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000358 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000359}
360
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000361static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200362getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300363 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600364 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000365{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000366 /* given a python object, return a data pointer, a length (in
367 characters), and a character size. return NULL if the object
368 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000369
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000370 /* Unicode objects do not support the buffer API. So, get the data
371 directly instead. */
372 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200373 if (PyUnicode_READY(string) == -1)
374 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200376 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300377 *p_isbytes = 0;
378 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000379 }
380
Victor Stinner0058b862011-09-29 03:27:47 +0200381 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200383 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300384 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000386
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 *p_length = view->len;
388 *p_charsize = 1;
389 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000390
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300391 if (view->buf == NULL) {
392 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
393 PyBuffer_Release(view);
394 view->buf = NULL;
395 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000396 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300397 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000398}
399
400LOCAL(PyObject*)
401state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000402 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000403{
404 /* prepare state object */
405
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000406 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300407 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000408 void* ptr;
409
410 memset(state, 0, sizeof(SRE_STATE));
411
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300412 state->mark = PyMem_New(void *, pattern->groups * 2);
413 if (!state->mark) {
414 PyErr_NoMemory();
415 goto err;
416 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000417 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000418 state->lastindex = -1;
419
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300421 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000422 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600423 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000424
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300425 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600426 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200427 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600428 goto err;
429 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300430 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200432 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600433 goto err;
434 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000435
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000436 /* adjust boundaries */
437 if (start < 0)
438 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000439 else if (start > length)
440 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000441
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000442 if (end < 0)
443 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000444 else if (end > length)
445 end = length;
446
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300447 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000448 state->charsize = charsize;
Guido van Rossumb700df92000-03-31 14:59:30 +0000449
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000450 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000451
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000452 state->start = (void*) ((char*) ptr + start * state->charsize);
453 state->end = (void*) ((char*) ptr + end * state->charsize);
454
455 Py_INCREF(string);
456 state->string = string;
457 state->pos = start;
458 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000459
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000460 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600461 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300462 PyMem_Del(state->mark);
463 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600464 if (state->buffer.buf)
465 PyBuffer_Release(&state->buffer);
466 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000467}
468
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000469LOCAL(void)
470state_fini(SRE_STATE* state)
471{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600472 if (state->buffer.buf)
473 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000474 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000475 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300476 PyMem_Del(state->mark);
477 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000478}
479
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000480/* calculate offset from start of string */
481#define STATE_OFFSET(state, member)\
482 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
483
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000484LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300485getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300486 PyObject* string, Py_ssize_t start, Py_ssize_t end)
487{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300488 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300489 if (PyBytes_CheckExact(string) &&
490 start == 0 && end == PyBytes_GET_SIZE(string)) {
491 Py_INCREF(string);
492 return string;
493 }
494 return PyBytes_FromStringAndSize(
495 (const char *)ptr + start, end - start);
496 }
497 else {
498 return PyUnicode_Substring(string, start, end);
499 }
500}
501
502LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000503state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000504{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000505 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000506
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000507 index = (index - 1) * 2;
508
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000509 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000510 if (empty)
511 /* want empty string */
512 i = j = 0;
513 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200514 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000515 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000516 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000517 i = STATE_OFFSET(state, state->mark[index]);
518 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000519 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000520
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300521 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522}
523
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000524static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100525pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000526{
527 switch (status) {
528 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400529 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000530 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400531 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000532 "maximum recursion limit exceeded"
533 );
534 break;
535 case SRE_ERROR_MEMORY:
536 PyErr_NoMemory();
537 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000538 case SRE_ERROR_INTERRUPTED:
539 /* An exception has already been raised, so let it fly */
540 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000541 default:
542 /* other error codes indicate compiler/engine bugs */
543 PyErr_SetString(
544 PyExc_RuntimeError,
545 "internal error in regular expression engine"
546 );
547 }
548}
549
Guido van Rossumb700df92000-03-31 14:59:30 +0000550static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000551pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000552{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000553 if (self->weakreflist != NULL)
554 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000555 Py_XDECREF(self->pattern);
556 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000557 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000558 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000559}
560
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300561LOCAL(Py_ssize_t)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300562sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300563{
564 if (state->charsize == 1)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300565 return sre_ucs1_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300566 if (state->charsize == 2)
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300567 return sre_ucs2_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300568 assert(state->charsize == 4);
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300569 return sre_ucs4_match(state, pattern, match_all);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300570}
571
572LOCAL(Py_ssize_t)
573sre_search(SRE_STATE* state, SRE_CODE* pattern)
574{
575 if (state->charsize == 1)
576 return sre_ucs1_search(state, pattern);
577 if (state->charsize == 2)
578 return sre_ucs2_search(state, pattern);
579 assert(state->charsize == 4);
580 return sre_ucs4_search(state, pattern);
581}
582
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300583/*[clinic input]
584_sre.SRE_Pattern.match
585
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200586 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300587 pos: Py_ssize_t = 0
588 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300589
590Matches zero or more characters at the beginning of the string.
591[clinic start generated code]*/
592
Larry Hastings16c51912014-01-07 11:53:01 -0800593static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300594_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200595 Py_ssize_t pos, Py_ssize_t endpos)
596/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800597{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000598 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100599 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300600 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000601
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300602 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000603 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000604
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000605 state.ptr = state.start;
606
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000607 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
608
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300609 status = sre_match(&state, PatternObject_GetCode(self), 0);
Guido van Rossumb700df92000-03-31 14:59:30 +0000610
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300612 if (PyErr_Occurred()) {
613 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000614 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300615 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000616
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300617 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000618 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300619 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000620}
621
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300622/*[clinic input]
623_sre.SRE_Pattern.fullmatch
624
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200625 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300626 pos: Py_ssize_t = 0
627 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300628
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300629Matches against all of the string.
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300630[clinic start generated code]*/
631
632static PyObject *
633_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200634 Py_ssize_t pos, Py_ssize_t endpos)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300635/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200636{
637 SRE_STATE state;
638 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300639 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200640
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300641 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200642 return NULL;
643
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644 state.ptr = state.start;
645
646 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
647
Serhiy Storchaka429b59e2014-05-14 21:48:17 +0300648 status = sre_match(&state, PatternObject_GetCode(self), 1);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200649
650 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300651 if (PyErr_Occurred()) {
652 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200653 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300654 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200655
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300656 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200657 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300658 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200659}
660
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300661/*[clinic input]
662_sre.SRE_Pattern.search
663
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200664 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300665 pos: Py_ssize_t = 0
666 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300667
668Scan through string looking for a match, and return a corresponding match object instance.
669
670Return None if no position in the string matches.
671[clinic start generated code]*/
672
673static PyObject *
674_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200675 Py_ssize_t pos, Py_ssize_t endpos)
676/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000677{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000678 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100679 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300680 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000681
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300682 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000683 return NULL;
684
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
686
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300687 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000688
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000689 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
690
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300691 if (PyErr_Occurred()) {
692 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000693 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300694 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000695
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300696 match = pattern_new_match(self, &state, status);
697 state_fini(&state);
698 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000699}
700
701static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200702call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000703{
704 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000705 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000706 PyObject* func;
707 PyObject* result;
708
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000709 if (!args)
710 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000711 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000712 if (!name)
713 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000714 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000715 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000716 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000717 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000718 func = PyObject_GetAttrString(mod, function);
719 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000720 if (!func)
721 return NULL;
722 result = PyObject_CallObject(func, args);
723 Py_DECREF(func);
724 Py_DECREF(args);
725 return result;
726}
727
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300728/*[clinic input]
729_sre.SRE_Pattern.findall
730
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200731 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300732 pos: Py_ssize_t = 0
733 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300734
735Return a list of all non-overlapping matches of pattern in string.
736[clinic start generated code]*/
737
738static PyObject *
739_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200740 Py_ssize_t pos, Py_ssize_t endpos)
741/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000742{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000743 SRE_STATE state;
744 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100745 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000746 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000747
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300748 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000749 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000750
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000751 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000752 if (!list) {
753 state_fini(&state);
754 return NULL;
755 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000756
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000757 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000760
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000761 state_reset(&state);
762
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000763 state.ptr = state.start;
764
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300765 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300766 if (PyErr_Occurred())
767 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000768
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000769 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000770 if (status == 0)
771 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000772 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000773 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000774 }
Tim Peters3d563502006-01-21 02:47:53 +0000775
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000776 /* don't bother to build a match object */
777 switch (self->groups) {
778 case 0:
779 b = STATE_OFFSET(&state, state.start);
780 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300781 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300782 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000783 if (!item)
784 goto error;
785 break;
786 case 1:
787 item = state_getslice(&state, 1, string, 1);
788 if (!item)
789 goto error;
790 break;
791 default:
792 item = PyTuple_New(self->groups);
793 if (!item)
794 goto error;
795 for (i = 0; i < self->groups; i++) {
796 PyObject* o = state_getslice(&state, i+1, string, 1);
797 if (!o) {
798 Py_DECREF(item);
799 goto error;
800 }
801 PyTuple_SET_ITEM(item, i, o);
802 }
803 break;
804 }
805
806 status = PyList_Append(list, item);
807 Py_DECREF(item);
808 if (status < 0)
809 goto error;
810
811 if (state.ptr == state.start)
812 state.start = (void*) ((char*) state.ptr + state.charsize);
813 else
814 state.start = state.ptr;
815
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000817
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000818 state_fini(&state);
819 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000820
821error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000822 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000823 state_fini(&state);
824 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000825
Guido van Rossumb700df92000-03-31 14:59:30 +0000826}
827
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300828/*[clinic input]
829_sre.SRE_Pattern.finditer
830
831 string: object
832 pos: Py_ssize_t = 0
833 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
834
835Return an iterator over all non-overlapping matches for the RE pattern in string.
836
837For each match, the iterator returns a match object.
838[clinic start generated code]*/
839
840static PyObject *
841_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
842 Py_ssize_t pos, Py_ssize_t endpos)
843/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000844{
845 PyObject* scanner;
846 PyObject* search;
847 PyObject* iterator;
848
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300849 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000850 if (!scanner)
851 return NULL;
852
853 search = PyObject_GetAttrString(scanner, "search");
854 Py_DECREF(scanner);
855 if (!search)
856 return NULL;
857
858 iterator = PyCallIter_New(search, Py_None);
859 Py_DECREF(search);
860
861 return iterator;
862}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000863
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300864/*[clinic input]
865_sre.SRE_Pattern.scanner
866
867 string: object
868 pos: Py_ssize_t = 0
869 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
870
871[clinic start generated code]*/
872
873static PyObject *
874_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
875 Py_ssize_t pos, Py_ssize_t endpos)
876/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
877{
878 return pattern_scanner(self, string, pos, endpos);
879}
880
881/*[clinic input]
882_sre.SRE_Pattern.split
883
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200884 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300885 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300886
887Split string by the occurrences of pattern.
888[clinic start generated code]*/
889
890static PyObject *
891_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200892 Py_ssize_t maxsplit)
893/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000894{
895 SRE_STATE state;
896 PyObject* list;
897 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100898 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000899 Py_ssize_t n;
900 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000901 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000902
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200903 assert(self->codesize != 0);
904 if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
905 if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
906 PyErr_SetString(PyExc_ValueError,
907 "split() requires a non-empty pattern match.");
908 return NULL;
909 }
910 if (PyErr_WarnEx(PyExc_FutureWarning,
911 "split() requires a non-empty pattern match.",
912 1) < 0)
913 return NULL;
914 }
915
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300916 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000917 return NULL;
918
919 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000920 if (!list) {
921 state_fini(&state);
922 return NULL;
923 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000924
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000925 n = 0;
926 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000927
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000928 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000929
930 state_reset(&state);
931
932 state.ptr = state.start;
933
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300934 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300935 if (PyErr_Occurred())
936 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000937
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000938 if (status <= 0) {
939 if (status == 0)
940 break;
941 pattern_error(status);
942 goto error;
943 }
Tim Peters3d563502006-01-21 02:47:53 +0000944
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000945 if (state.start == state.ptr) {
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +0300946 if (last == state.end || state.ptr == state.end)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000947 break;
948 /* skip one character */
949 state.start = (void*) ((char*) state.ptr + state.charsize);
950 continue;
951 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000952
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000953 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300954 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000955 string, STATE_OFFSET(&state, last),
956 STATE_OFFSET(&state, state.start)
957 );
958 if (!item)
959 goto error;
960 status = PyList_Append(list, item);
961 Py_DECREF(item);
962 if (status < 0)
963 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000964
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000965 /* add groups (if any) */
966 for (i = 0; i < self->groups; i++) {
967 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000968 if (!item)
969 goto error;
970 status = PyList_Append(list, item);
971 Py_DECREF(item);
972 if (status < 0)
973 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000974 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000975
976 n = n + 1;
977
978 last = state.start = state.ptr;
979
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000980 }
981
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000982 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300983 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000984 string, STATE_OFFSET(&state, last), state.endpos
985 );
986 if (!item)
987 goto error;
988 status = PyList_Append(list, item);
989 Py_DECREF(item);
990 if (status < 0)
991 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000992
993 state_fini(&state);
994 return list;
995
996error:
997 Py_DECREF(list);
998 state_fini(&state);
999 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001000
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001001}
Fredrik Lundh971e78b2001-10-20 17:48:46 +00001002
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001003static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001004pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001005 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001006{
1007 SRE_STATE state;
1008 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001009 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001010 PyObject* item;
1011 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001012 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001013 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01001014 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001015 Py_ssize_t n;
1016 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001017 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001018 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001019 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001020
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001021 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001022 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001023 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001024 Py_INCREF(filter);
1025 filter_is_callable = 1;
1026 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001027 /* if not callable, check if it's a literal string */
1028 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001029 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001030 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001031 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001032 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001033 if (charsize == 1)
1034 literal = memchr(ptr, '\\', n) == NULL;
1035 else
1036 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001037 } else {
1038 PyErr_Clear();
1039 literal = 0;
1040 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001041 if (view.buf)
1042 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001043 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001044 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001045 Py_INCREF(filter);
1046 filter_is_callable = 0;
1047 } else {
1048 /* not a literal; hand it over to the template compiler */
1049 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001050 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001051 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001052 );
1053 if (!filter)
1054 return NULL;
1055 filter_is_callable = PyCallable_Check(filter);
1056 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001057 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001058
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001059 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001060 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001061 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001062 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001063
1064 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001065 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001066 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001067 state_fini(&state);
1068 return NULL;
1069 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001070
1071 n = i = 0;
1072
1073 while (!count || n < count) {
1074
1075 state_reset(&state);
1076
1077 state.ptr = state.start;
1078
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001079 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001080 if (PyErr_Occurred())
1081 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001082
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001083 if (status <= 0) {
1084 if (status == 0)
1085 break;
1086 pattern_error(status);
1087 goto error;
1088 }
Tim Peters3d563502006-01-21 02:47:53 +00001089
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001090 b = STATE_OFFSET(&state, state.start);
1091 e = STATE_OFFSET(&state, state.ptr);
1092
1093 if (i < b) {
1094 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001095 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001096 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001097 if (!item)
1098 goto error;
1099 status = PyList_Append(list, item);
1100 Py_DECREF(item);
1101 if (status < 0)
1102 goto error;
1103
1104 } else if (i == b && i == e && n > 0)
1105 /* ignore empty match on latest position */
1106 goto next;
1107
1108 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001109 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110 match = pattern_new_match(self, &state, 1);
1111 if (!match)
1112 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001113 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001114 Py_DECREF(match);
1115 if (!item)
1116 goto error;
1117 } else {
1118 /* filter is literal string */
1119 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001120 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001121 }
1122
1123 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001124 if (item != Py_None) {
1125 status = PyList_Append(list, item);
1126 Py_DECREF(item);
1127 if (status < 0)
1128 goto error;
1129 }
Tim Peters3d563502006-01-21 02:47:53 +00001130
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001131 i = e;
1132 n = n + 1;
1133
1134next:
1135 /* move on */
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03001136 if (state.ptr == state.end)
1137 break;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001138 if (state.ptr == state.start)
1139 state.start = (void*) ((char*) state.ptr + state.charsize);
1140 else
1141 state.start = state.ptr;
1142
1143 }
1144
1145 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001146 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001147 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001148 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001149 if (!item)
1150 goto error;
1151 status = PyList_Append(list, item);
1152 Py_DECREF(item);
1153 if (status < 0)
1154 goto error;
1155 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001156
1157 state_fini(&state);
1158
Guido van Rossum4e173842001-12-07 04:25:10 +00001159 Py_DECREF(filter);
1160
Fredrik Lundhdac58492001-10-21 21:48:30 +00001161 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001162 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001163 if (!joiner) {
1164 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001165 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001166 }
1167 if (PyList_GET_SIZE(list) == 0) {
1168 Py_DECREF(list);
1169 item = joiner;
1170 }
1171 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001172 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001173 item = _PyBytes_Join(joiner, list);
1174 else
1175 item = PyUnicode_Join(joiner, list);
1176 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001177 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001178 if (!item)
1179 return NULL;
1180 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001181
1182 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001183 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001184
1185 return item;
1186
1187error:
1188 Py_DECREF(list);
1189 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001190 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001191 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001192
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001193}
1194
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001195/*[clinic input]
1196_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001197
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001198 repl: object
1199 string: object
1200 count: Py_ssize_t = 0
1201
1202Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1203[clinic start generated code]*/
1204
1205static PyObject *
1206_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1207 PyObject *string, Py_ssize_t count)
1208/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1209{
1210 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001211}
1212
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001213/*[clinic input]
1214_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001215
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001216 repl: object
1217 string: object
1218 count: Py_ssize_t = 0
1219
1220Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1221[clinic start generated code]*/
1222
1223static PyObject *
1224_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1225 PyObject *string, Py_ssize_t count)
1226/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1227{
1228 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001229}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001230
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001231/*[clinic input]
1232_sre.SRE_Pattern.__copy__
1233
1234[clinic start generated code]*/
1235
1236static PyObject *
1237_sre_SRE_Pattern___copy___impl(PatternObject *self)
1238/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001239{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001240 Py_INCREF(self);
1241 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001242}
1243
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001244/*[clinic input]
1245_sre.SRE_Pattern.__deepcopy__
1246
1247 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001248 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001249
1250[clinic start generated code]*/
1251
1252static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001253_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1254/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001255{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001256 Py_INCREF(self);
1257 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001258}
1259
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001260static PyObject *
1261pattern_repr(PatternObject *obj)
1262{
1263 static const struct {
1264 const char *name;
1265 int value;
1266 } flag_names[] = {
1267 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1268 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1269 {"re.LOCALE", SRE_FLAG_LOCALE},
1270 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1271 {"re.DOTALL", SRE_FLAG_DOTALL},
1272 {"re.UNICODE", SRE_FLAG_UNICODE},
1273 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1274 {"re.DEBUG", SRE_FLAG_DEBUG},
1275 {"re.ASCII", SRE_FLAG_ASCII},
1276 };
1277 PyObject *result = NULL;
1278 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001279 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001280 int flags = obj->flags;
1281
1282 /* Omit re.UNICODE for valid string patterns. */
1283 if (obj->isbytes == 0 &&
1284 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1285 SRE_FLAG_UNICODE)
1286 flags &= ~SRE_FLAG_UNICODE;
1287
1288 flag_items = PyList_New(0);
1289 if (!flag_items)
1290 return NULL;
1291
1292 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1293 if (flags & flag_names[i].value) {
1294 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1295 if (!item)
1296 goto done;
1297
1298 if (PyList_Append(flag_items, item) < 0) {
1299 Py_DECREF(item);
1300 goto done;
1301 }
1302 Py_DECREF(item);
1303 flags &= ~flag_names[i].value;
1304 }
1305 }
1306 if (flags) {
1307 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1308 if (!item)
1309 goto done;
1310
1311 if (PyList_Append(flag_items, item) < 0) {
1312 Py_DECREF(item);
1313 goto done;
1314 }
1315 Py_DECREF(item);
1316 }
1317
1318 if (PyList_Size(flag_items) > 0) {
1319 PyObject *flags_result;
1320 PyObject *sep = PyUnicode_FromString("|");
1321 if (!sep)
1322 goto done;
1323 flags_result = PyUnicode_Join(sep, flag_items);
1324 Py_DECREF(sep);
1325 if (!flags_result)
1326 goto done;
1327 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1328 obj->pattern, flags_result);
1329 Py_DECREF(flags_result);
1330 }
1331 else {
1332 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1333 }
1334
1335done:
1336 Py_DECREF(flag_items);
1337 return result;
1338}
1339
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03001340PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001341
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001342/* PatternObject's 'groupindex' method. */
1343static PyObject *
1344pattern_groupindex(PatternObject *self)
1345{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001346 if (self->groupindex == NULL)
1347 return PyDict_New();
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001348 return PyDictProxy_New(self->groupindex);
1349}
1350
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001351static int _validate(PatternObject *self); /* Forward */
1352
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001353/*[clinic input]
1354_sre.compile
1355
1356 pattern: object
1357 flags: int
1358 code: object(subclass_of='&PyList_Type')
1359 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001360 groupindex: object(subclass_of='&PyDict_Type')
1361 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001362
1363[clinic start generated code]*/
1364
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001365static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001366_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001367 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1368 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001369/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001370{
1371 /* "compile" pattern descriptor to pattern object */
1372
1373 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001374 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001375
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001376 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001377 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001378 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1379 if (!self)
1380 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001381 self->weakreflist = NULL;
1382 self->pattern = NULL;
1383 self->groupindex = NULL;
1384 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001385
1386 self->codesize = n;
1387
1388 for (i = 0; i < n; i++) {
1389 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001390 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001391 self->code[i] = (SRE_CODE) value;
1392 if ((unsigned long) self->code[i] != value) {
1393 PyErr_SetString(PyExc_OverflowError,
1394 "regular expression code size limit exceeded");
1395 break;
1396 }
1397 }
1398
1399 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001400 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001401 return NULL;
1402 }
1403
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001404 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001405 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001406 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001407 else {
1408 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001409 int charsize;
1410 Py_buffer view;
1411 view.buf = NULL;
1412 if (!getstring(pattern, &p_length, &self->isbytes,
1413 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001414 Py_DECREF(self);
1415 return NULL;
1416 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001417 if (view.buf)
1418 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001419 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001420
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001421 Py_INCREF(pattern);
1422 self->pattern = pattern;
1423
1424 self->flags = flags;
1425
1426 self->groups = groups;
1427
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001428 if (PyDict_GET_SIZE(groupindex) > 0) {
1429 Py_INCREF(groupindex);
1430 self->groupindex = groupindex;
1431 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1432 Py_INCREF(indexgroup);
1433 self->indexgroup = indexgroup;
1434 }
1435 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001436
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001437 if (!_validate(self)) {
1438 Py_DECREF(self);
1439 return NULL;
1440 }
1441
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001442 return (PyObject*) self;
1443}
1444
Guido van Rossumb700df92000-03-31 14:59:30 +00001445/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001446/* Code validation */
1447
1448/* To learn more about this code, have a look at the _compile() function in
1449 Lib/sre_compile.py. The validation functions below checks the code array
1450 for conformance with the code patterns generated there.
1451
1452 The nice thing about the generated code is that it is position-independent:
1453 all jumps are relative jumps forward. Also, jumps don't cross each other:
1454 the target of a later jump is always earlier than the target of an earlier
1455 jump. IOW, this is okay:
1456
1457 J---------J-------T--------T
1458 \ \_____/ /
1459 \______________________/
1460
1461 but this is not:
1462
1463 J---------J-------T--------T
1464 \_________\_____/ /
1465 \____________/
1466
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001467 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001468*/
1469
1470/* Defining this one enables tracing of the validator */
1471#undef VVERBOSE
1472
1473/* Trace macro for the validator */
1474#if defined(VVERBOSE)
1475#define VTRACE(v) printf v
1476#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001477#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001478#endif
1479
1480/* Report failure */
1481#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1482
1483/* Extract opcode, argument, or skip count from code array */
1484#define GET_OP \
1485 do { \
1486 VTRACE(("%p: ", code)); \
1487 if (code >= end) FAIL; \
1488 op = *code++; \
1489 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1490 } while (0)
1491#define GET_ARG \
1492 do { \
1493 VTRACE(("%p= ", code)); \
1494 if (code >= end) FAIL; \
1495 arg = *code++; \
1496 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1497 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001498#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001499 do { \
1500 VTRACE(("%p= ", code)); \
1501 if (code >= end) FAIL; \
1502 skip = *code; \
1503 VTRACE(("%lu (skip to %p)\n", \
1504 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001505 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001506 FAIL; \
1507 code++; \
1508 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001509#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001510
1511static int
1512_validate_charset(SRE_CODE *code, SRE_CODE *end)
1513{
1514 /* Some variables are manipulated by the macros above */
1515 SRE_CODE op;
1516 SRE_CODE arg;
1517 SRE_CODE offset;
1518 int i;
1519
1520 while (code < end) {
1521 GET_OP;
1522 switch (op) {
1523
1524 case SRE_OP_NEGATE:
1525 break;
1526
1527 case SRE_OP_LITERAL:
1528 GET_ARG;
1529 break;
1530
1531 case SRE_OP_RANGE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001532 case SRE_OP_RANGE_UNI_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001533 GET_ARG;
1534 GET_ARG;
1535 break;
1536
1537 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001538 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001539 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001540 FAIL;
1541 code += offset;
1542 break;
1543
1544 case SRE_OP_BIGCHARSET:
1545 GET_ARG; /* Number of blocks */
1546 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001547 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001548 FAIL;
1549 /* Make sure that each byte points to a valid block */
1550 for (i = 0; i < 256; i++) {
1551 if (((unsigned char *)code)[i] >= arg)
1552 FAIL;
1553 }
1554 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001555 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001556 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001557 FAIL;
1558 code += offset;
1559 break;
1560
1561 case SRE_OP_CATEGORY:
1562 GET_ARG;
1563 switch (arg) {
1564 case SRE_CATEGORY_DIGIT:
1565 case SRE_CATEGORY_NOT_DIGIT:
1566 case SRE_CATEGORY_SPACE:
1567 case SRE_CATEGORY_NOT_SPACE:
1568 case SRE_CATEGORY_WORD:
1569 case SRE_CATEGORY_NOT_WORD:
1570 case SRE_CATEGORY_LINEBREAK:
1571 case SRE_CATEGORY_NOT_LINEBREAK:
1572 case SRE_CATEGORY_LOC_WORD:
1573 case SRE_CATEGORY_LOC_NOT_WORD:
1574 case SRE_CATEGORY_UNI_DIGIT:
1575 case SRE_CATEGORY_UNI_NOT_DIGIT:
1576 case SRE_CATEGORY_UNI_SPACE:
1577 case SRE_CATEGORY_UNI_NOT_SPACE:
1578 case SRE_CATEGORY_UNI_WORD:
1579 case SRE_CATEGORY_UNI_NOT_WORD:
1580 case SRE_CATEGORY_UNI_LINEBREAK:
1581 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1582 break;
1583 default:
1584 FAIL;
1585 }
1586 break;
1587
1588 default:
1589 FAIL;
1590
1591 }
1592 }
1593
1594 return 1;
1595}
1596
1597static int
1598_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1599{
1600 /* Some variables are manipulated by the macros above */
1601 SRE_CODE op;
1602 SRE_CODE arg;
1603 SRE_CODE skip;
1604
1605 VTRACE(("code=%p, end=%p\n", code, end));
1606
1607 if (code > end)
1608 FAIL;
1609
1610 while (code < end) {
1611 GET_OP;
1612 switch (op) {
1613
1614 case SRE_OP_MARK:
1615 /* We don't check whether marks are properly nested; the
1616 sre_match() code is robust even if they don't, and the worst
1617 you can get is nonsensical match results. */
1618 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001619 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001620 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1621 FAIL;
1622 }
1623 break;
1624
1625 case SRE_OP_LITERAL:
1626 case SRE_OP_NOT_LITERAL:
1627 case SRE_OP_LITERAL_IGNORE:
1628 case SRE_OP_NOT_LITERAL_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001629 case SRE_OP_LITERAL_UNI_IGNORE:
1630 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001631 case SRE_OP_LITERAL_LOC_IGNORE:
1632 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001633 GET_ARG;
1634 /* The arg is just a character, nothing to check */
1635 break;
1636
1637 case SRE_OP_SUCCESS:
1638 case SRE_OP_FAILURE:
1639 /* Nothing to check; these normally end the matching process */
1640 break;
1641
1642 case SRE_OP_AT:
1643 GET_ARG;
1644 switch (arg) {
1645 case SRE_AT_BEGINNING:
1646 case SRE_AT_BEGINNING_STRING:
1647 case SRE_AT_BEGINNING_LINE:
1648 case SRE_AT_END:
1649 case SRE_AT_END_LINE:
1650 case SRE_AT_END_STRING:
1651 case SRE_AT_BOUNDARY:
1652 case SRE_AT_NON_BOUNDARY:
1653 case SRE_AT_LOC_BOUNDARY:
1654 case SRE_AT_LOC_NON_BOUNDARY:
1655 case SRE_AT_UNI_BOUNDARY:
1656 case SRE_AT_UNI_NON_BOUNDARY:
1657 break;
1658 default:
1659 FAIL;
1660 }
1661 break;
1662
1663 case SRE_OP_ANY:
1664 case SRE_OP_ANY_ALL:
1665 /* These have no operands */
1666 break;
1667
1668 case SRE_OP_IN:
1669 case SRE_OP_IN_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001670 case SRE_OP_IN_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001671 case SRE_OP_IN_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001672 GET_SKIP;
1673 /* Stop 1 before the end; we check the FAILURE below */
1674 if (!_validate_charset(code, code+skip-2))
1675 FAIL;
1676 if (code[skip-2] != SRE_OP_FAILURE)
1677 FAIL;
1678 code += skip-1;
1679 break;
1680
1681 case SRE_OP_INFO:
1682 {
1683 /* A minimal info field is
1684 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1685 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1686 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001687 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001688 SRE_CODE *newcode;
1689 GET_SKIP;
1690 newcode = code+skip-1;
1691 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001692 GET_ARG;
1693 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001694 /* Check that only valid flags are present */
1695 if ((flags & ~(SRE_INFO_PREFIX |
1696 SRE_INFO_LITERAL |
1697 SRE_INFO_CHARSET)) != 0)
1698 FAIL;
1699 /* PREFIX and CHARSET are mutually exclusive */
1700 if ((flags & SRE_INFO_PREFIX) &&
1701 (flags & SRE_INFO_CHARSET))
1702 FAIL;
1703 /* LITERAL implies PREFIX */
1704 if ((flags & SRE_INFO_LITERAL) &&
1705 !(flags & SRE_INFO_PREFIX))
1706 FAIL;
1707 /* Validate the prefix */
1708 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001709 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001710 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001711 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001712 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001713 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001714 FAIL;
1715 code += prefix_len;
1716 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001717 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001718 FAIL;
1719 /* Each overlap value should be < prefix_len */
1720 for (i = 0; i < prefix_len; i++) {
1721 if (code[i] >= prefix_len)
1722 FAIL;
1723 }
1724 code += prefix_len;
1725 }
1726 /* Validate the charset */
1727 if (flags & SRE_INFO_CHARSET) {
1728 if (!_validate_charset(code, newcode-1))
1729 FAIL;
1730 if (newcode[-1] != SRE_OP_FAILURE)
1731 FAIL;
1732 code = newcode;
1733 }
1734 else if (code != newcode) {
1735 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1736 FAIL;
1737 }
1738 }
1739 break;
1740
1741 case SRE_OP_BRANCH:
1742 {
1743 SRE_CODE *target = NULL;
1744 for (;;) {
1745 GET_SKIP;
1746 if (skip == 0)
1747 break;
1748 /* Stop 2 before the end; we check the JUMP below */
1749 if (!_validate_inner(code, code+skip-3, groups))
1750 FAIL;
1751 code += skip-3;
1752 /* Check that it ends with a JUMP, and that each JUMP
1753 has the same target */
1754 GET_OP;
1755 if (op != SRE_OP_JUMP)
1756 FAIL;
1757 GET_SKIP;
1758 if (target == NULL)
1759 target = code+skip-1;
1760 else if (code+skip-1 != target)
1761 FAIL;
1762 }
1763 }
1764 break;
1765
1766 case SRE_OP_REPEAT_ONE:
1767 case SRE_OP_MIN_REPEAT_ONE:
1768 {
1769 SRE_CODE min, max;
1770 GET_SKIP;
1771 GET_ARG; min = arg;
1772 GET_ARG; max = arg;
1773 if (min > max)
1774 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001775 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001776 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001777 if (!_validate_inner(code, code+skip-4, groups))
1778 FAIL;
1779 code += skip-4;
1780 GET_OP;
1781 if (op != SRE_OP_SUCCESS)
1782 FAIL;
1783 }
1784 break;
1785
1786 case SRE_OP_REPEAT:
1787 {
1788 SRE_CODE min, max;
1789 GET_SKIP;
1790 GET_ARG; min = arg;
1791 GET_ARG; max = arg;
1792 if (min > max)
1793 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001794 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001795 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001796 if (!_validate_inner(code, code+skip-3, groups))
1797 FAIL;
1798 code += skip-3;
1799 GET_OP;
1800 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1801 FAIL;
1802 }
1803 break;
1804
1805 case SRE_OP_GROUPREF:
1806 case SRE_OP_GROUPREF_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001807 case SRE_OP_GROUPREF_UNI_IGNORE:
1808 case SRE_OP_GROUPREF_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001809 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001810 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001811 FAIL;
1812 break;
1813
1814 case SRE_OP_GROUPREF_EXISTS:
1815 /* The regex syntax for this is: '(?(group)then|else)', where
1816 'group' is either an integer group number or a group name,
1817 'then' and 'else' are sub-regexes, and 'else' is optional. */
1818 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001819 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001820 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001821 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001822 code--; /* The skip is relative to the first arg! */
1823 /* There are two possibilities here: if there is both a 'then'
1824 part and an 'else' part, the generated code looks like:
1825
1826 GROUPREF_EXISTS
1827 <group>
1828 <skipyes>
1829 ...then part...
1830 JUMP
1831 <skipno>
1832 (<skipyes> jumps here)
1833 ...else part...
1834 (<skipno> jumps here)
1835
1836 If there is only a 'then' part, it looks like:
1837
1838 GROUPREF_EXISTS
1839 <group>
1840 <skip>
1841 ...then part...
1842 (<skip> jumps here)
1843
1844 There is no direct way to decide which it is, and we don't want
1845 to allow arbitrary jumps anywhere in the code; so we just look
1846 for a JUMP opcode preceding our skip target.
1847 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001848 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001849 code[skip-3] == SRE_OP_JUMP)
1850 {
1851 VTRACE(("both then and else parts present\n"));
1852 if (!_validate_inner(code+1, code+skip-3, groups))
1853 FAIL;
1854 code += skip-2; /* Position after JUMP, at <skipno> */
1855 GET_SKIP;
1856 if (!_validate_inner(code, code+skip-1, groups))
1857 FAIL;
1858 code += skip-1;
1859 }
1860 else {
1861 VTRACE(("only a then part present\n"));
1862 if (!_validate_inner(code+1, code+skip-1, groups))
1863 FAIL;
1864 code += skip-1;
1865 }
1866 break;
1867
1868 case SRE_OP_ASSERT:
1869 case SRE_OP_ASSERT_NOT:
1870 GET_SKIP;
1871 GET_ARG; /* 0 for lookahead, width for lookbehind */
1872 code--; /* Back up over arg to simplify math below */
1873 if (arg & 0x80000000)
1874 FAIL; /* Width too large */
1875 /* Stop 1 before the end; we check the SUCCESS below */
1876 if (!_validate_inner(code+1, code+skip-2, groups))
1877 FAIL;
1878 code += skip-2;
1879 GET_OP;
1880 if (op != SRE_OP_SUCCESS)
1881 FAIL;
1882 break;
1883
1884 default:
1885 FAIL;
1886
1887 }
1888 }
1889
1890 VTRACE(("okay\n"));
1891 return 1;
1892}
1893
1894static int
1895_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1896{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001897 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1898 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001899 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001900 return _validate_inner(code, end-1, groups);
1901}
1902
1903static int
1904_validate(PatternObject *self)
1905{
1906 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1907 {
1908 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1909 return 0;
1910 }
1911 else
1912 VTRACE(("Success!\n"));
1913 return 1;
1914}
1915
1916/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001917/* match methods */
1918
1919static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001920match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001921{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001922 Py_XDECREF(self->regs);
1923 Py_XDECREF(self->string);
1924 Py_DECREF(self->pattern);
1925 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001926}
1927
1928static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001929match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001930{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001931 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001932 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001933 Py_buffer view;
1934 PyObject *result;
1935 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001936 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001937
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001938 if (index < 0 || index >= self->groups) {
1939 /* raise IndexError if we were given a bad group number */
1940 PyErr_SetString(
1941 PyExc_IndexError,
1942 "no such group"
1943 );
1944 return NULL;
1945 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001946
Fredrik Lundh6f013982000-07-03 18:44:21 +00001947 index *= 2;
1948
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001949 if (self->string == Py_None || self->mark[index] < 0) {
1950 /* return default value if the string or group is undefined */
1951 Py_INCREF(def);
1952 return def;
1953 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001954
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001955 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001956 if (ptr == NULL)
1957 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001958
1959 i = self->mark[index];
1960 j = self->mark[index+1];
1961 i = Py_MIN(i, length);
1962 j = Py_MIN(j, length);
1963 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001964 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001965 PyBuffer_Release(&view);
1966 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001967}
1968
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001969static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001970match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001971{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001972 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001973
Guido van Rossumddefaf32007-01-14 03:31:43 +00001974 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001975 /* Default value */
1976 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001977
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001978 if (PyIndex_Check(index)) {
1979 return PyNumber_AsSsize_t(index, NULL);
1980 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001981
Fredrik Lundh6f013982000-07-03 18:44:21 +00001982 i = -1;
1983
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001984 if (self->pattern->groupindex) {
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001985 index = PyDict_GetItem(self->pattern->groupindex, index);
1986 if (index && PyLong_Check(index)) {
1987 i = PyLong_AsSsize_t(index);
1988 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001989 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001990
1991 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001992}
1993
1994static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001995match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001996{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001997 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001998}
1999
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002000/*[clinic input]
2001_sre.SRE_Match.expand
2002
2003 template: object
2004
2005Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2006[clinic start generated code]*/
2007
2008static PyObject *
2009_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2010/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002011{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002012 /* delegate to Python code */
2013 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00002014 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002015 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00002016 );
2017}
2018
2019static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002020match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00002021{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002022 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002023 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00002024
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002025 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00002026
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002027 switch (size) {
2028 case 0:
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002029 result = match_getslice(self, _PyLong_Zero, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002030 break;
2031 case 1:
2032 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2033 break;
2034 default:
2035 /* fetch multiple items */
2036 result = PyTuple_New(size);
2037 if (!result)
2038 return NULL;
2039 for (i = 0; i < size; i++) {
2040 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002041 self, PyTuple_GET_ITEM(args, i), Py_None
2042 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002043 if (!item) {
2044 Py_DECREF(result);
2045 return NULL;
2046 }
2047 PyTuple_SET_ITEM(result, i, item);
2048 }
2049 break;
2050 }
2051 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002052}
2053
Eric V. Smith605bdae2016-09-11 08:55:43 -04002054static PyObject*
2055match_getitem(MatchObject* self, PyObject* name)
2056{
2057 return match_getslice(self, name, Py_None);
2058}
2059
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002060/*[clinic input]
2061_sre.SRE_Match.groups
2062
2063 default: object = None
2064 Is used for groups that did not participate in the match.
2065
2066Return a tuple containing all the subgroups of the match, from 1.
2067[clinic start generated code]*/
2068
2069static PyObject *
2070_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2071/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002072{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002073 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002074 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002075
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002076 result = PyTuple_New(self->groups-1);
2077 if (!result)
2078 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002080 for (index = 1; index < self->groups; index++) {
2081 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002082 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002083 if (!item) {
2084 Py_DECREF(result);
2085 return NULL;
2086 }
2087 PyTuple_SET_ITEM(result, index-1, item);
2088 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002089
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002090 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002091}
2092
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002093/*[clinic input]
2094_sre.SRE_Match.groupdict
2095
2096 default: object = None
2097 Is used for groups that did not participate in the match.
2098
2099Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2100[clinic start generated code]*/
2101
2102static PyObject *
2103_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2104/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002105{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002106 PyObject *result;
2107 PyObject *key;
2108 PyObject *value;
2109 Py_ssize_t pos = 0;
2110 Py_hash_t hash;
Guido van Rossumb700df92000-03-31 14:59:30 +00002111
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002112 result = PyDict_New();
2113 if (!result || !self->pattern->groupindex)
2114 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002115
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002116 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002117 int status;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002118 Py_INCREF(key);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002119 value = match_getslice(self, key, default_value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002120 if (!value) {
2121 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002122 goto failed;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002123 }
2124 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002125 Py_DECREF(value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002126 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002127 if (status < 0)
2128 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002129 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002130
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002131 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002132
2133failed:
Fredrik Lundh770617b2001-01-14 15:06:11 +00002134 Py_DECREF(result);
2135 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002136}
2137
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002138/*[clinic input]
2139_sre.SRE_Match.start -> Py_ssize_t
2140
2141 group: object(c_default="NULL") = 0
2142 /
2143
2144Return index of the start of the substring matched by group.
2145[clinic start generated code]*/
2146
2147static Py_ssize_t
2148_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2149/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002150{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002151 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002152
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002153 if (index < 0 || index >= self->groups) {
2154 PyErr_SetString(
2155 PyExc_IndexError,
2156 "no such group"
2157 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002158 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002159 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002160
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002161 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002162 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002163}
2164
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002165/*[clinic input]
2166_sre.SRE_Match.end -> Py_ssize_t
2167
2168 group: object(c_default="NULL") = 0
2169 /
2170
2171Return index of the end of the substring matched by group.
2172[clinic start generated code]*/
2173
2174static Py_ssize_t
2175_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2176/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002177{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002178 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002179
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002180 if (index < 0 || index >= self->groups) {
2181 PyErr_SetString(
2182 PyExc_IndexError,
2183 "no such group"
2184 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002185 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002186 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002187
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002188 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002189 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002190}
2191
2192LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002193_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002194{
2195 PyObject* pair;
2196 PyObject* item;
2197
2198 pair = PyTuple_New(2);
2199 if (!pair)
2200 return NULL;
2201
Christian Heimes217cfd12007-12-02 14:31:20 +00002202 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002203 if (!item)
2204 goto error;
2205 PyTuple_SET_ITEM(pair, 0, item);
2206
Christian Heimes217cfd12007-12-02 14:31:20 +00002207 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002208 if (!item)
2209 goto error;
2210 PyTuple_SET_ITEM(pair, 1, item);
2211
2212 return pair;
2213
2214 error:
2215 Py_DECREF(pair);
2216 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002217}
2218
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002219/*[clinic input]
2220_sre.SRE_Match.span
2221
2222 group: object(c_default="NULL") = 0
2223 /
2224
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002225For match object m, return the 2-tuple (m.start(group), m.end(group)).
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002226[clinic start generated code]*/
2227
2228static PyObject *
2229_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002230/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002231{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002232 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002233
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002234 if (index < 0 || index >= self->groups) {
2235 PyErr_SetString(
2236 PyExc_IndexError,
2237 "no such group"
2238 );
2239 return NULL;
2240 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002241
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002242 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002243 return _pair(self->mark[index*2], self->mark[index*2+1]);
2244}
2245
2246static PyObject*
2247match_regs(MatchObject* self)
2248{
2249 PyObject* regs;
2250 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002251 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002252
2253 regs = PyTuple_New(self->groups);
2254 if (!regs)
2255 return NULL;
2256
2257 for (index = 0; index < self->groups; index++) {
2258 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2259 if (!item) {
2260 Py_DECREF(regs);
2261 return NULL;
2262 }
2263 PyTuple_SET_ITEM(regs, index, item);
2264 }
2265
2266 Py_INCREF(regs);
2267 self->regs = regs;
2268
2269 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002270}
2271
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002272/*[clinic input]
2273_sre.SRE_Match.__copy__
2274
2275[clinic start generated code]*/
2276
2277static PyObject *
2278_sre_SRE_Match___copy___impl(MatchObject *self)
2279/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002280{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002281 Py_INCREF(self);
2282 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002283}
2284
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002285/*[clinic input]
2286_sre.SRE_Match.__deepcopy__
2287
2288 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002289 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002290
2291[clinic start generated code]*/
2292
2293static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002294_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2295/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002296{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002297 Py_INCREF(self);
2298 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002299}
2300
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002301PyDoc_STRVAR(match_doc,
2302"The result of re.match() and re.search().\n\
2303Match objects always have a boolean value of True.");
2304
2305PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002306"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002307 Return subgroup(s) of the match by indices or names.\n\
2308 For 0 returns the entire match.");
2309
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002310static PyObject *
2311match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002312{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002313 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002314 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002315 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002316}
2317
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002318static PyObject *
2319match_lastgroup_get(MatchObject *self)
2320{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002321 if (self->pattern->indexgroup &&
2322 self->lastindex >= 0 &&
2323 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2324 {
2325 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2326 self->lastindex);
2327 Py_INCREF(result);
2328 return result;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002329 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002330 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002331}
2332
2333static PyObject *
2334match_regs_get(MatchObject *self)
2335{
2336 if (self->regs) {
2337 Py_INCREF(self->regs);
2338 return self->regs;
2339 } else
2340 return match_regs(self);
2341}
2342
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002343static PyObject *
2344match_repr(MatchObject *self)
2345{
2346 PyObject *result;
2347 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2348 if (group0 == NULL)
2349 return NULL;
2350 result = PyUnicode_FromFormat(
2351 "<%s object; span=(%d, %d), match=%.50R>",
2352 Py_TYPE(self)->tp_name,
2353 self->mark[0], self->mark[1], group0);
2354 Py_DECREF(group0);
2355 return result;
2356}
2357
2358
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002359static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002360pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002361{
2362 /* create match object (from state object) */
2363
2364 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002365 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002366 char* base;
2367 int n;
2368
2369 if (status > 0) {
2370
2371 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002372 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002373 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2374 2*(pattern->groups+1));
2375 if (!match)
2376 return NULL;
2377
2378 Py_INCREF(pattern);
2379 match->pattern = pattern;
2380
2381 Py_INCREF(state->string);
2382 match->string = state->string;
2383
2384 match->regs = NULL;
2385 match->groups = pattern->groups+1;
2386
2387 /* fill in group slices */
2388
2389 base = (char*) state->beginning;
2390 n = state->charsize;
2391
2392 match->mark[0] = ((char*) state->start - base) / n;
2393 match->mark[1] = ((char*) state->ptr - base) / n;
2394
2395 for (i = j = 0; i < pattern->groups; i++, j+=2)
2396 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2397 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2398 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2399 } else
2400 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2401
2402 match->pos = state->pos;
2403 match->endpos = state->endpos;
2404
2405 match->lastindex = state->lastindex;
2406
2407 return (PyObject*) match;
2408
2409 } else if (status == 0) {
2410
2411 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002412 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002413
2414 }
2415
2416 /* internal error */
2417 pattern_error(status);
2418 return NULL;
2419}
2420
2421
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002422/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002423/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002424
2425static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002426scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002427{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002428 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002429 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002430 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002431}
2432
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002433/*[clinic input]
2434_sre.SRE_Scanner.match
2435
2436[clinic start generated code]*/
2437
2438static PyObject *
2439_sre_SRE_Scanner_match_impl(ScannerObject *self)
2440/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002441{
2442 SRE_STATE* state = &self->state;
2443 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002444 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002445
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002446 if (state->start == NULL)
2447 Py_RETURN_NONE;
2448
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002449 state_reset(state);
2450
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002451 state->ptr = state->start;
2452
Serhiy Storchaka429b59e2014-05-14 21:48:17 +03002453 status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
Thomas Wouters89f507f2006-12-13 04:49:30 +00002454 if (PyErr_Occurred())
2455 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002456
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002457 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002458 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002459
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002460 if (status == 0)
2461 state->start = NULL;
2462 else if (state->ptr != state->start)
2463 state->start = state->ptr;
2464 else if (state->ptr != state->end)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002465 state->start = (void*) ((char*) state->ptr + state->charsize);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002466 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002467 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002468
2469 return match;
2470}
2471
2472
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002473/*[clinic input]
2474_sre.SRE_Scanner.search
2475
2476[clinic start generated code]*/
2477
2478static PyObject *
2479_sre_SRE_Scanner_search_impl(ScannerObject *self)
2480/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002481{
2482 SRE_STATE* state = &self->state;
2483 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002484 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002485
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002486 if (state->start == NULL)
2487 Py_RETURN_NONE;
2488
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002489 state_reset(state);
2490
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002491 state->ptr = state->start;
2492
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002493 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002494 if (PyErr_Occurred())
2495 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002496
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002497 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002498 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002499
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002500 if (status == 0)
2501 state->start = NULL;
2502 else if (state->ptr != state->start)
2503 state->start = state->ptr;
2504 else if (state->ptr != state->end)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002505 state->start = (void*) ((char*) state->ptr + state->charsize);
2506 else
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002507 state->start = NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002508
2509 return match;
2510}
2511
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002512static PyObject *
2513pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002514{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002515 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002516
2517 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002518 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2519 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002520 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002521 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002522
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002523 /* create search state object */
2524 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2525 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002526 return NULL;
2527 }
2528
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002529 Py_INCREF(self);
2530 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002531
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002532 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002533}
2534
Victor Stinnerb44fb122016-11-21 16:35:08 +01002535static Py_hash_t
2536pattern_hash(PatternObject *self)
2537{
2538 Py_hash_t hash, hash2;
2539
2540 hash = PyObject_Hash(self->pattern);
2541 if (hash == -1) {
2542 return -1;
2543 }
2544
2545 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2546 hash ^= hash2;
2547
2548 hash ^= self->flags;
2549 hash ^= self->isbytes;
2550 hash ^= self->codesize;
2551
2552 if (hash == -1) {
2553 hash = -2;
2554 }
2555 return hash;
2556}
2557
2558static PyObject*
2559pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2560{
2561 PatternObject *left, *right;
2562 int cmp;
2563
2564 if (op != Py_EQ && op != Py_NE) {
2565 Py_RETURN_NOTIMPLEMENTED;
2566 }
2567
2568 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2569 Py_RETURN_NOTIMPLEMENTED;
2570 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002571
2572 if (lefto == righto) {
2573 /* a pattern is equal to itself */
2574 return PyBool_FromLong(op == Py_EQ);
2575 }
2576
Victor Stinnerb44fb122016-11-21 16:35:08 +01002577 left = (PatternObject *)lefto;
2578 right = (PatternObject *)righto;
2579
2580 cmp = (left->flags == right->flags
2581 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002582 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002583 if (cmp) {
2584 /* Compare the code and the pattern because the same pattern can
2585 produce different codes depending on the locale used to compile the
2586 pattern when the re.LOCALE flag is used. Don't compare groups,
2587 indexgroup nor groupindex: they are derivated from the pattern. */
2588 cmp = (memcmp(left->code, right->code,
2589 sizeof(left->code[0]) * left->codesize) == 0);
2590 }
2591 if (cmp) {
2592 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2593 Py_EQ);
2594 if (cmp < 0) {
2595 return NULL;
2596 }
2597 }
2598 if (op == Py_NE) {
2599 cmp = !cmp;
2600 }
2601 return PyBool_FromLong(cmp);
2602}
2603
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002604#include "clinic/_sre.c.h"
2605
2606static PyMethodDef pattern_methods[] = {
2607 _SRE_SRE_PATTERN_MATCH_METHODDEF
2608 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2609 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2610 _SRE_SRE_PATTERN_SUB_METHODDEF
2611 _SRE_SRE_PATTERN_SUBN_METHODDEF
2612 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2613 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2614 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2615 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2616 _SRE_SRE_PATTERN___COPY___METHODDEF
2617 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2618 {NULL, NULL}
2619};
2620
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002621static PyGetSetDef pattern_getset[] = {
2622 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2623 "A dictionary mapping group names to group numbers."},
2624 {NULL} /* Sentinel */
2625};
2626
2627#define PAT_OFF(x) offsetof(PatternObject, x)
2628static PyMemberDef pattern_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002629 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2630 "The pattern string from which the RE object was compiled."},
2631 {"flags", T_INT, PAT_OFF(flags), READONLY,
2632 "The regex matching flags."},
2633 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2634 "The number of capturing groups in the pattern."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002635 {NULL} /* Sentinel */
2636};
2637
2638static PyTypeObject Pattern_Type = {
2639 PyVarObject_HEAD_INIT(NULL, 0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002640 "re.Pattern",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002641 sizeof(PatternObject), sizeof(SRE_CODE),
2642 (destructor)pattern_dealloc, /* tp_dealloc */
2643 0, /* tp_print */
2644 0, /* tp_getattr */
2645 0, /* tp_setattr */
2646 0, /* tp_reserved */
2647 (reprfunc)pattern_repr, /* tp_repr */
2648 0, /* tp_as_number */
2649 0, /* tp_as_sequence */
2650 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002651 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002652 0, /* tp_call */
2653 0, /* tp_str */
2654 0, /* tp_getattro */
2655 0, /* tp_setattro */
2656 0, /* tp_as_buffer */
2657 Py_TPFLAGS_DEFAULT, /* tp_flags */
2658 pattern_doc, /* tp_doc */
2659 0, /* tp_traverse */
2660 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002661 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002662 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2663 0, /* tp_iter */
2664 0, /* tp_iternext */
2665 pattern_methods, /* tp_methods */
2666 pattern_members, /* tp_members */
2667 pattern_getset, /* tp_getset */
2668};
2669
Eric V. Smith605bdae2016-09-11 08:55:43 -04002670/* Match objects do not support length or assignment, but do support
2671 __getitem__. */
2672static PyMappingMethods match_as_mapping = {
2673 NULL,
2674 (binaryfunc)match_getitem,
2675 NULL
2676};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002677
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002678static PyMethodDef match_methods[] = {
2679 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2680 _SRE_SRE_MATCH_START_METHODDEF
2681 _SRE_SRE_MATCH_END_METHODDEF
2682 _SRE_SRE_MATCH_SPAN_METHODDEF
2683 _SRE_SRE_MATCH_GROUPS_METHODDEF
2684 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2685 _SRE_SRE_MATCH_EXPAND_METHODDEF
2686 _SRE_SRE_MATCH___COPY___METHODDEF
2687 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2688 {NULL, NULL}
2689};
2690
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002691static PyGetSetDef match_getset[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002692 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2693 "The integer index of the last matched capturing group."},
2694 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2695 "The name of the last matched capturing group."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002696 {"regs", (getter)match_regs_get, (setter)NULL},
2697 {NULL}
2698};
2699
2700#define MATCH_OFF(x) offsetof(MatchObject, x)
2701static PyMemberDef match_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002702 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2703 "The string passed to match() or search()."},
2704 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2705 "The regular expression object."},
2706 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2707 "The index into the string at which the RE engine started looking for a match."},
2708 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2709 "The index into the string beyond which the RE engine will not go."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002710 {NULL}
2711};
2712
2713/* FIXME: implement setattr("string", None) as a special case (to
2714 detach the associated string, if any */
2715
2716static PyTypeObject Match_Type = {
2717 PyVarObject_HEAD_INIT(NULL,0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002718 "re.Match",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002719 sizeof(MatchObject), sizeof(Py_ssize_t),
2720 (destructor)match_dealloc, /* tp_dealloc */
2721 0, /* tp_print */
2722 0, /* tp_getattr */
2723 0, /* tp_setattr */
2724 0, /* tp_reserved */
2725 (reprfunc)match_repr, /* tp_repr */
2726 0, /* tp_as_number */
2727 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002728 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002729 0, /* tp_hash */
2730 0, /* tp_call */
2731 0, /* tp_str */
2732 0, /* tp_getattro */
2733 0, /* tp_setattro */
2734 0, /* tp_as_buffer */
2735 Py_TPFLAGS_DEFAULT, /* tp_flags */
2736 match_doc, /* tp_doc */
2737 0, /* tp_traverse */
2738 0, /* tp_clear */
2739 0, /* tp_richcompare */
2740 0, /* tp_weaklistoffset */
2741 0, /* tp_iter */
2742 0, /* tp_iternext */
2743 match_methods, /* tp_methods */
2744 match_members, /* tp_members */
2745 match_getset, /* tp_getset */
2746};
2747
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002748static PyMethodDef scanner_methods[] = {
2749 _SRE_SRE_SCANNER_MATCH_METHODDEF
2750 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2751 {NULL, NULL}
2752};
2753
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002754#define SCAN_OFF(x) offsetof(ScannerObject, x)
2755static PyMemberDef scanner_members[] = {
2756 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2757 {NULL} /* Sentinel */
2758};
2759
2760static PyTypeObject Scanner_Type = {
2761 PyVarObject_HEAD_INIT(NULL, 0)
2762 "_" SRE_MODULE ".SRE_Scanner",
2763 sizeof(ScannerObject), 0,
2764 (destructor)scanner_dealloc,/* tp_dealloc */
2765 0, /* tp_print */
2766 0, /* tp_getattr */
2767 0, /* tp_setattr */
2768 0, /* tp_reserved */
2769 0, /* tp_repr */
2770 0, /* tp_as_number */
2771 0, /* tp_as_sequence */
2772 0, /* tp_as_mapping */
2773 0, /* tp_hash */
2774 0, /* tp_call */
2775 0, /* tp_str */
2776 0, /* tp_getattro */
2777 0, /* tp_setattro */
2778 0, /* tp_as_buffer */
2779 Py_TPFLAGS_DEFAULT, /* tp_flags */
2780 0, /* tp_doc */
2781 0, /* tp_traverse */
2782 0, /* tp_clear */
2783 0, /* tp_richcompare */
2784 0, /* tp_weaklistoffset */
2785 0, /* tp_iter */
2786 0, /* tp_iternext */
2787 scanner_methods, /* tp_methods */
2788 scanner_members, /* tp_members */
2789 0, /* tp_getset */
2790};
2791
Guido van Rossumb700df92000-03-31 14:59:30 +00002792static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002793 _SRE_COMPILE_METHODDEF
2794 _SRE_GETCODESIZE_METHODDEF
Serhiy Storchaka6d336a02017-05-09 23:37:14 +03002795 _SRE_ASCII_ISCASED_METHODDEF
2796 _SRE_UNICODE_ISCASED_METHODDEF
Serhiy Storchaka7186cc22017-05-05 10:42:46 +03002797 _SRE_ASCII_TOLOWER_METHODDEF
2798 _SRE_UNICODE_TOLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002799 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002800};
2801
Martin v. Löwis1a214512008-06-11 05:26:20 +00002802static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002803 PyModuleDef_HEAD_INIT,
2804 "_" SRE_MODULE,
2805 NULL,
2806 -1,
2807 _functions,
2808 NULL,
2809 NULL,
2810 NULL,
2811 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002812};
2813
2814PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002815{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002816 PyObject* m;
2817 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002818 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002819
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002820 /* Patch object types */
2821 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2822 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002823 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002824
Martin v. Löwis1a214512008-06-11 05:26:20 +00002825 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002826 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002827 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002828 d = PyModule_GetDict(m);
2829
Christian Heimes217cfd12007-12-02 14:31:20 +00002830 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002831 if (x) {
2832 PyDict_SetItemString(d, "MAGIC", x);
2833 Py_DECREF(x);
2834 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002835
Christian Heimes217cfd12007-12-02 14:31:20 +00002836 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002837 if (x) {
2838 PyDict_SetItemString(d, "CODESIZE", x);
2839 Py_DECREF(x);
2840 }
2841
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002842 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2843 if (x) {
2844 PyDict_SetItemString(d, "MAXREPEAT", x);
2845 Py_DECREF(x);
2846 }
2847
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002848 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2849 if (x) {
2850 PyDict_SetItemString(d, "MAXGROUPS", x);
2851 Py_DECREF(x);
2852 }
2853
Neal Norwitzfe537132007-08-26 03:55:15 +00002854 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002855 if (x) {
2856 PyDict_SetItemString(d, "copyright", x);
2857 Py_DECREF(x);
2858 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002859 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002860}
2861
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002862/* vim:ts=4:sw=4:et
2863*/