blob: b6be6f6ffa6063c766458297d3f344d4d386ff01 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh80946112000-06-29 18:03:25 +000064#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000065#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000066#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* fastest possible local call under MSVC */
68#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#else
Benjamin Peterson791dc832017-04-20 23:52:19 -070070#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000071#endif
72
73/* error codes */
74#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000077#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000078#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000081#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000082#else
83#define TRACE(v)
84#endif
85
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000086/* -------------------------------------------------------------------- */
87/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000088
Fredrik Lundh436c3d582000-06-29 08:58:44 +000089#define SRE_IS_DIGIT(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030090 ((ch) < 128 && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000091#define SRE_IS_SPACE(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030092 ((ch) < 128 && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030094 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define SRE_IS_ALNUM(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030096 ((ch) < 128 && Py_ISALNUM(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000097#define SRE_IS_WORD(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030098 ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +000099
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300100static unsigned int sre_lower_ascii(unsigned int ch)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000101{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300102 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000103}
104
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300105static unsigned int sre_upper_ascii(unsigned int ch)
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200106{
107 return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
108}
109
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000110/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000111/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
112 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000113#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000114#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
115
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000116static unsigned int sre_lower_locale(unsigned int ch)
117{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000118 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000119}
120
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200121static unsigned int sre_upper_locale(unsigned int ch)
122{
123 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
124}
125
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000126/* unicode-specific character predicates */
127
Victor Stinner0058b862011-09-29 03:27:47 +0200128#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
129#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
130#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
131#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
132#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000133
134static unsigned int sre_lower_unicode(unsigned int ch)
135{
Victor Stinner0058b862011-09-29 03:27:47 +0200136 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000137}
138
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200139static unsigned int sre_upper_unicode(unsigned int ch)
140{
141 return (unsigned int) Py_UNICODE_TOUPPER(ch);
142}
143
Guido van Rossumb700df92000-03-31 14:59:30 +0000144LOCAL(int)
145sre_category(SRE_CODE category, unsigned int ch)
146{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000147 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000148
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000149 case SRE_CATEGORY_DIGIT:
150 return SRE_IS_DIGIT(ch);
151 case SRE_CATEGORY_NOT_DIGIT:
152 return !SRE_IS_DIGIT(ch);
153 case SRE_CATEGORY_SPACE:
154 return SRE_IS_SPACE(ch);
155 case SRE_CATEGORY_NOT_SPACE:
156 return !SRE_IS_SPACE(ch);
157 case SRE_CATEGORY_WORD:
158 return SRE_IS_WORD(ch);
159 case SRE_CATEGORY_NOT_WORD:
160 return !SRE_IS_WORD(ch);
161 case SRE_CATEGORY_LINEBREAK:
162 return SRE_IS_LINEBREAK(ch);
163 case SRE_CATEGORY_NOT_LINEBREAK:
164 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000165
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000166 case SRE_CATEGORY_LOC_WORD:
167 return SRE_LOC_IS_WORD(ch);
168 case SRE_CATEGORY_LOC_NOT_WORD:
169 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000170
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 case SRE_CATEGORY_UNI_DIGIT:
172 return SRE_UNI_IS_DIGIT(ch);
173 case SRE_CATEGORY_UNI_NOT_DIGIT:
174 return !SRE_UNI_IS_DIGIT(ch);
175 case SRE_CATEGORY_UNI_SPACE:
176 return SRE_UNI_IS_SPACE(ch);
177 case SRE_CATEGORY_UNI_NOT_SPACE:
178 return !SRE_UNI_IS_SPACE(ch);
179 case SRE_CATEGORY_UNI_WORD:
180 return SRE_UNI_IS_WORD(ch);
181 case SRE_CATEGORY_UNI_NOT_WORD:
182 return !SRE_UNI_IS_WORD(ch);
183 case SRE_CATEGORY_UNI_LINEBREAK:
184 return SRE_UNI_IS_LINEBREAK(ch);
185 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
186 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000187 }
188 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000189}
190
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300191LOCAL(int)
192char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
193{
194 return ch == pattern
195 || (SRE_CODE) sre_lower_locale(ch) == pattern
196 || (SRE_CODE) sre_upper_locale(ch) == pattern;
197}
198
199
Guido van Rossumb700df92000-03-31 14:59:30 +0000200/* helpers */
201
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000202static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000203data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000204{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000205 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000206 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000207 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000208 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000210}
211
212static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000214{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000215 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000216 minsize = state->data_stack_base+size;
217 cursize = state->data_stack_size;
218 if (cursize < minsize) {
219 void* stack;
220 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300221 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000222 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000223 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000224 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000225 return SRE_ERROR_MEMORY;
226 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000227 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000228 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000229 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000230 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000231}
232
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000233/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000234
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300235#define SRE_CHAR Py_UCS1
236#define SIZEOF_SRE_CHAR 1
237#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300238#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000239
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300240/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000241
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300242#define SRE_CHAR Py_UCS2
243#define SIZEOF_SRE_CHAR 2
244#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300245#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000246
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300247/* generate 32-bit unicode version */
248
249#define SRE_CHAR Py_UCS4
250#define SIZEOF_SRE_CHAR 4
251#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300252#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000253
254/* -------------------------------------------------------------------- */
255/* factories and destructors */
256
257/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100258static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300259static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000260
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300261
262/*[clinic input]
263module _sre
264class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
265class _sre.SRE_Match "MatchObject *" "&Match_Type"
266class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
267[clinic start generated code]*/
268/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
269
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700270static PyTypeObject Pattern_Type;
271static PyTypeObject Match_Type;
272static PyTypeObject Scanner_Type;
273
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300274/*[clinic input]
275_sre.getcodesize -> int
276[clinic start generated code]*/
277
278static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300279_sre_getcodesize_impl(PyObject *module)
280/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000281{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300282 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000283}
284
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300285/*[clinic input]
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300286_sre.ascii_iscased -> bool
287
288 character: int
289 /
290
291[clinic start generated code]*/
292
293static int
294_sre_ascii_iscased_impl(PyObject *module, int character)
295/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
296{
297 unsigned int ch = (unsigned int)character;
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300298 return ch != sre_lower_ascii(ch) || ch != sre_upper_ascii(ch);
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300299}
300
301/*[clinic input]
302_sre.unicode_iscased -> bool
303
304 character: int
305 /
306
307[clinic start generated code]*/
308
309static int
310_sre_unicode_iscased_impl(PyObject *module, int character)
311/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
312{
313 unsigned int ch = (unsigned int)character;
314 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
315}
316
317/*[clinic input]
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300318_sre.ascii_tolower -> int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300319
320 character: int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300321 /
322
323[clinic start generated code]*/
324
325static int
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300326_sre_ascii_tolower_impl(PyObject *module, int character)
327/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000328{
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300329 return sre_lower_ascii(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000330}
331
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300332/*[clinic input]
333_sre.unicode_tolower -> int
334
335 character: int
336 /
337
338[clinic start generated code]*/
339
340static int
341_sre_unicode_tolower_impl(PyObject *module, int character)
342/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
343{
344 return sre_lower_unicode(character);
345}
346
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000347LOCAL(void)
348state_reset(SRE_STATE* state)
349{
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000350 /* FIXME: dynamic! */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000351 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000352
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000353 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000354 state->lastindex = -1;
355
356 state->repeat = NULL;
357
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000358 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000359}
360
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000361static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200362getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300363 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600364 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000365{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000366 /* given a python object, return a data pointer, a length (in
367 characters), and a character size. return NULL if the object
368 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000369
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000370 /* Unicode objects do not support the buffer API. So, get the data
371 directly instead. */
372 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200373 if (PyUnicode_READY(string) == -1)
374 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200376 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300377 *p_isbytes = 0;
378 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000379 }
380
Victor Stinner0058b862011-09-29 03:27:47 +0200381 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300382 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200383 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300384 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000385 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000386
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300387 *p_length = view->len;
388 *p_charsize = 1;
389 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000390
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300391 if (view->buf == NULL) {
392 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
393 PyBuffer_Release(view);
394 view->buf = NULL;
395 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000396 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300397 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000398}
399
400LOCAL(PyObject*)
401state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000402 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000403{
404 /* prepare state object */
405
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000406 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300407 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000408 void* ptr;
409
410 memset(state, 0, sizeof(SRE_STATE));
411
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300412 state->mark = PyMem_New(void *, pattern->groups * 2);
413 if (!state->mark) {
414 PyErr_NoMemory();
415 goto err;
416 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000417 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000418 state->lastindex = -1;
419
Benjamin Petersone48944b2012-03-07 14:50:25 -0600420 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300421 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000422 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600423 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000424
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300425 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600426 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200427 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600428 goto err;
429 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300430 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600431 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200432 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600433 goto err;
434 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000435
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000436 /* adjust boundaries */
437 if (start < 0)
438 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000439 else if (start > length)
440 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000441
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000442 if (end < 0)
443 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000444 else if (end > length)
445 end = length;
446
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300447 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000448 state->charsize = charsize;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200449 state->match_all = 0;
450 state->must_advance = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000451
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000452 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000453
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000454 state->start = (void*) ((char*) ptr + start * state->charsize);
455 state->end = (void*) ((char*) ptr + end * state->charsize);
456
457 Py_INCREF(string);
458 state->string = string;
459 state->pos = start;
460 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000461
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000462 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600463 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300464 PyMem_Del(state->mark);
465 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600466 if (state->buffer.buf)
467 PyBuffer_Release(&state->buffer);
468 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000469}
470
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000471LOCAL(void)
472state_fini(SRE_STATE* state)
473{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600474 if (state->buffer.buf)
475 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000476 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000477 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300478 PyMem_Del(state->mark);
479 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000480}
481
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000482/* calculate offset from start of string */
483#define STATE_OFFSET(state, member)\
484 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
485
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000486LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300487getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300488 PyObject* string, Py_ssize_t start, Py_ssize_t end)
489{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300490 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300491 if (PyBytes_CheckExact(string) &&
492 start == 0 && end == PyBytes_GET_SIZE(string)) {
493 Py_INCREF(string);
494 return string;
495 }
496 return PyBytes_FromStringAndSize(
497 (const char *)ptr + start, end - start);
498 }
499 else {
500 return PyUnicode_Substring(string, start, end);
501 }
502}
503
504LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000505state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000506{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000507 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000508
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000509 index = (index - 1) * 2;
510
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000511 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000512 if (empty)
513 /* want empty string */
514 i = j = 0;
515 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200516 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000517 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000518 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000519 i = STATE_OFFSET(state, state->mark[index]);
520 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000521 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000522
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300523 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000524}
525
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000526static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100527pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000528{
529 switch (status) {
530 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400531 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000532 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400533 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000534 "maximum recursion limit exceeded"
535 );
536 break;
537 case SRE_ERROR_MEMORY:
538 PyErr_NoMemory();
539 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000540 case SRE_ERROR_INTERRUPTED:
541 /* An exception has already been raised, so let it fly */
542 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000543 default:
544 /* other error codes indicate compiler/engine bugs */
545 PyErr_SetString(
546 PyExc_RuntimeError,
547 "internal error in regular expression engine"
548 );
549 }
550}
551
Guido van Rossumb700df92000-03-31 14:59:30 +0000552static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000553pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000554{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000555 if (self->weakreflist != NULL)
556 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000557 Py_XDECREF(self->pattern);
558 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000559 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000560 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000561}
562
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300563LOCAL(Py_ssize_t)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200564sre_match(SRE_STATE* state, SRE_CODE* pattern)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300565{
566 if (state->charsize == 1)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200567 return sre_ucs1_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300568 if (state->charsize == 2)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200569 return sre_ucs2_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300570 assert(state->charsize == 4);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200571 return sre_ucs4_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300572}
573
574LOCAL(Py_ssize_t)
575sre_search(SRE_STATE* state, SRE_CODE* pattern)
576{
577 if (state->charsize == 1)
578 return sre_ucs1_search(state, pattern);
579 if (state->charsize == 2)
580 return sre_ucs2_search(state, pattern);
581 assert(state->charsize == 4);
582 return sre_ucs4_search(state, pattern);
583}
584
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300585/*[clinic input]
586_sre.SRE_Pattern.match
587
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200588 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300589 pos: Py_ssize_t = 0
590 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300591
592Matches zero or more characters at the beginning of the string.
593[clinic start generated code]*/
594
Larry Hastings16c51912014-01-07 11:53:01 -0800595static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300596_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200597 Py_ssize_t pos, Py_ssize_t endpos)
598/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800599{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000600 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100601 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300602 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000603
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300604 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000605 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000606
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000607 state.ptr = state.start;
608
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000609 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
610
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200611 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000612
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000613 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300614 if (PyErr_Occurred()) {
615 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000616 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300617 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000618
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300619 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000620 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300621 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000622}
623
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300624/*[clinic input]
625_sre.SRE_Pattern.fullmatch
626
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200627 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300628 pos: Py_ssize_t = 0
629 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300630
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300631Matches against all of the string.
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300632[clinic start generated code]*/
633
634static PyObject *
635_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200636 Py_ssize_t pos, Py_ssize_t endpos)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300637/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200638{
639 SRE_STATE state;
640 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300641 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200642
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300643 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200644 return NULL;
645
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200646 state.ptr = state.start;
647
648 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
649
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200650 state.match_all = 1;
651 status = sre_match(&state, PatternObject_GetCode(self));
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200652
653 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300654 if (PyErr_Occurred()) {
655 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200656 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300657 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200658
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300659 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200660 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300661 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200662}
663
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300664/*[clinic input]
665_sre.SRE_Pattern.search
666
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200667 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300668 pos: Py_ssize_t = 0
669 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300670
671Scan through string looking for a match, and return a corresponding match object instance.
672
673Return None if no position in the string matches.
674[clinic start generated code]*/
675
676static PyObject *
677_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200678 Py_ssize_t pos, Py_ssize_t endpos)
679/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000680{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000681 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100682 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300683 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300685 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000686 return NULL;
687
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000688 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
689
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300690 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000691
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000692 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
693
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300694 if (PyErr_Occurred()) {
695 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000696 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300697 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000698
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300699 match = pattern_new_match(self, &state, status);
700 state_fini(&state);
701 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000702}
703
704static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200705call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000706{
707 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000708 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000709 PyObject* func;
710 PyObject* result;
711
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000712 if (!args)
713 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000714 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000715 if (!name)
716 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000717 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000718 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000719 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000720 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000721 func = PyObject_GetAttrString(mod, function);
722 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000723 if (!func)
724 return NULL;
725 result = PyObject_CallObject(func, args);
726 Py_DECREF(func);
727 Py_DECREF(args);
728 return result;
729}
730
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300731/*[clinic input]
732_sre.SRE_Pattern.findall
733
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200734 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300735 pos: Py_ssize_t = 0
736 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300737
738Return a list of all non-overlapping matches of pattern in string.
739[clinic start generated code]*/
740
741static PyObject *
742_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200743 Py_ssize_t pos, Py_ssize_t endpos)
744/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000745{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000746 SRE_STATE state;
747 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100748 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000749 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000750
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300751 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000752 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000753
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000754 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000755 if (!list) {
756 state_fini(&state);
757 return NULL;
758 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000759
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000760 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000761
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000762 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000763
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000764 state_reset(&state);
765
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000766 state.ptr = state.start;
767
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300768 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300769 if (PyErr_Occurred())
770 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000771
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000772 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000773 if (status == 0)
774 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000775 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000776 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000777 }
Tim Peters3d563502006-01-21 02:47:53 +0000778
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000779 /* don't bother to build a match object */
780 switch (self->groups) {
781 case 0:
782 b = STATE_OFFSET(&state, state.start);
783 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300784 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300785 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000786 if (!item)
787 goto error;
788 break;
789 case 1:
790 item = state_getslice(&state, 1, string, 1);
791 if (!item)
792 goto error;
793 break;
794 default:
795 item = PyTuple_New(self->groups);
796 if (!item)
797 goto error;
798 for (i = 0; i < self->groups; i++) {
799 PyObject* o = state_getslice(&state, i+1, string, 1);
800 if (!o) {
801 Py_DECREF(item);
802 goto error;
803 }
804 PyTuple_SET_ITEM(item, i, o);
805 }
806 break;
807 }
808
809 status = PyList_Append(list, item);
810 Py_DECREF(item);
811 if (status < 0)
812 goto error;
813
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200814 state.must_advance = (state.ptr == state.start);
815 state.start = state.ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000817
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000818 state_fini(&state);
819 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000820
821error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000822 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000823 state_fini(&state);
824 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000825
Guido van Rossumb700df92000-03-31 14:59:30 +0000826}
827
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300828/*[clinic input]
829_sre.SRE_Pattern.finditer
830
831 string: object
832 pos: Py_ssize_t = 0
833 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
834
835Return an iterator over all non-overlapping matches for the RE pattern in string.
836
837For each match, the iterator returns a match object.
838[clinic start generated code]*/
839
840static PyObject *
841_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
842 Py_ssize_t pos, Py_ssize_t endpos)
843/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000844{
845 PyObject* scanner;
846 PyObject* search;
847 PyObject* iterator;
848
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300849 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000850 if (!scanner)
851 return NULL;
852
853 search = PyObject_GetAttrString(scanner, "search");
854 Py_DECREF(scanner);
855 if (!search)
856 return NULL;
857
858 iterator = PyCallIter_New(search, Py_None);
859 Py_DECREF(search);
860
861 return iterator;
862}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000863
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300864/*[clinic input]
865_sre.SRE_Pattern.scanner
866
867 string: object
868 pos: Py_ssize_t = 0
869 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
870
871[clinic start generated code]*/
872
873static PyObject *
874_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
875 Py_ssize_t pos, Py_ssize_t endpos)
876/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
877{
878 return pattern_scanner(self, string, pos, endpos);
879}
880
881/*[clinic input]
882_sre.SRE_Pattern.split
883
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200884 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300885 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300886
887Split string by the occurrences of pattern.
888[clinic start generated code]*/
889
890static PyObject *
891_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200892 Py_ssize_t maxsplit)
893/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000894{
895 SRE_STATE state;
896 PyObject* list;
897 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100898 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000899 Py_ssize_t n;
900 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000901 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000902
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200903 assert(self->codesize != 0);
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200904
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300905 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000906 return NULL;
907
908 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000909 if (!list) {
910 state_fini(&state);
911 return NULL;
912 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000913
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000914 n = 0;
915 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000916
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000917 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000918
919 state_reset(&state);
920
921 state.ptr = state.start;
922
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300923 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300924 if (PyErr_Occurred())
925 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000926
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000927 if (status <= 0) {
928 if (status == 0)
929 break;
930 pattern_error(status);
931 goto error;
932 }
Tim Peters3d563502006-01-21 02:47:53 +0000933
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000934 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300935 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000936 string, STATE_OFFSET(&state, last),
937 STATE_OFFSET(&state, state.start)
938 );
939 if (!item)
940 goto error;
941 status = PyList_Append(list, item);
942 Py_DECREF(item);
943 if (status < 0)
944 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000945
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000946 /* add groups (if any) */
947 for (i = 0; i < self->groups; i++) {
948 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000949 if (!item)
950 goto error;
951 status = PyList_Append(list, item);
952 Py_DECREF(item);
953 if (status < 0)
954 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000955 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000956
957 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +0200958 state.must_advance = (state.ptr == state.start);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000959 last = state.start = state.ptr;
960
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000961 }
962
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000963 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300964 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000965 string, STATE_OFFSET(&state, last), state.endpos
966 );
967 if (!item)
968 goto error;
969 status = PyList_Append(list, item);
970 Py_DECREF(item);
971 if (status < 0)
972 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000973
974 state_fini(&state);
975 return list;
976
977error:
978 Py_DECREF(list);
979 state_fini(&state);
980 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000981
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000982}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000983
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000984static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000985pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000986 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000987{
988 SRE_STATE state;
989 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300990 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000991 PyObject* item;
992 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000993 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000994 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100995 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000996 Py_ssize_t n;
997 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300998 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000999 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001000 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001001
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001002 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001003 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001004 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001005 Py_INCREF(filter);
1006 filter_is_callable = 1;
1007 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001008 /* if not callable, check if it's a literal string */
1009 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001010 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001011 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001013 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001014 if (charsize == 1)
1015 literal = memchr(ptr, '\\', n) == NULL;
1016 else
1017 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001018 } else {
1019 PyErr_Clear();
1020 literal = 0;
1021 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001022 if (view.buf)
1023 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001024 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001025 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001026 Py_INCREF(filter);
1027 filter_is_callable = 0;
1028 } else {
1029 /* not a literal; hand it over to the template compiler */
1030 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001031 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001032 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001033 );
1034 if (!filter)
1035 return NULL;
1036 filter_is_callable = PyCallable_Check(filter);
1037 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001038 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001039
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001040 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001041 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001042 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001043 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001044
1045 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001046 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001047 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001048 state_fini(&state);
1049 return NULL;
1050 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001051
1052 n = i = 0;
1053
1054 while (!count || n < count) {
1055
1056 state_reset(&state);
1057
1058 state.ptr = state.start;
1059
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001060 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001061 if (PyErr_Occurred())
1062 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001063
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001064 if (status <= 0) {
1065 if (status == 0)
1066 break;
1067 pattern_error(status);
1068 goto error;
1069 }
Tim Peters3d563502006-01-21 02:47:53 +00001070
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001071 b = STATE_OFFSET(&state, state.start);
1072 e = STATE_OFFSET(&state, state.ptr);
1073
1074 if (i < b) {
1075 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001076 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001077 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001078 if (!item)
1079 goto error;
1080 status = PyList_Append(list, item);
1081 Py_DECREF(item);
1082 if (status < 0)
1083 goto error;
1084
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001085 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001086
1087 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001088 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001089 match = pattern_new_match(self, &state, 1);
1090 if (!match)
1091 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001092 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001093 Py_DECREF(match);
1094 if (!item)
1095 goto error;
1096 } else {
1097 /* filter is literal string */
1098 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001099 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001100 }
1101
1102 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001103 if (item != Py_None) {
1104 status = PyList_Append(list, item);
1105 Py_DECREF(item);
1106 if (status < 0)
1107 goto error;
1108 }
Tim Peters3d563502006-01-21 02:47:53 +00001109
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001110 i = e;
1111 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +02001112 state.must_advance = (state.ptr == state.start);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001113 state.start = state.ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001114 }
1115
1116 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001117 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001118 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001119 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001120 if (!item)
1121 goto error;
1122 status = PyList_Append(list, item);
1123 Py_DECREF(item);
1124 if (status < 0)
1125 goto error;
1126 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001127
1128 state_fini(&state);
1129
Guido van Rossum4e173842001-12-07 04:25:10 +00001130 Py_DECREF(filter);
1131
Fredrik Lundhdac58492001-10-21 21:48:30 +00001132 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001133 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001134 if (!joiner) {
1135 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001136 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001137 }
1138 if (PyList_GET_SIZE(list) == 0) {
1139 Py_DECREF(list);
1140 item = joiner;
1141 }
1142 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001143 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001144 item = _PyBytes_Join(joiner, list);
1145 else
1146 item = PyUnicode_Join(joiner, list);
1147 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001148 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001149 if (!item)
1150 return NULL;
1151 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001152
1153 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001154 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001155
1156 return item;
1157
1158error:
1159 Py_DECREF(list);
1160 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001161 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001162 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001163
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001164}
1165
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001166/*[clinic input]
1167_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001168
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001169 repl: object
1170 string: object
1171 count: Py_ssize_t = 0
1172
1173Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1174[clinic start generated code]*/
1175
1176static PyObject *
1177_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1178 PyObject *string, Py_ssize_t count)
1179/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1180{
1181 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001182}
1183
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001184/*[clinic input]
1185_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001186
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001187 repl: object
1188 string: object
1189 count: Py_ssize_t = 0
1190
1191Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1192[clinic start generated code]*/
1193
1194static PyObject *
1195_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1196 PyObject *string, Py_ssize_t count)
1197/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1198{
1199 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001200}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001201
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001202/*[clinic input]
1203_sre.SRE_Pattern.__copy__
1204
1205[clinic start generated code]*/
1206
1207static PyObject *
1208_sre_SRE_Pattern___copy___impl(PatternObject *self)
1209/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001210{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001211 Py_INCREF(self);
1212 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001213}
1214
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001215/*[clinic input]
1216_sre.SRE_Pattern.__deepcopy__
1217
1218 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001219 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001220
1221[clinic start generated code]*/
1222
1223static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001224_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1225/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001226{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001227 Py_INCREF(self);
1228 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001229}
1230
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001231static PyObject *
1232pattern_repr(PatternObject *obj)
1233{
1234 static const struct {
1235 const char *name;
1236 int value;
1237 } flag_names[] = {
1238 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1239 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1240 {"re.LOCALE", SRE_FLAG_LOCALE},
1241 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1242 {"re.DOTALL", SRE_FLAG_DOTALL},
1243 {"re.UNICODE", SRE_FLAG_UNICODE},
1244 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1245 {"re.DEBUG", SRE_FLAG_DEBUG},
1246 {"re.ASCII", SRE_FLAG_ASCII},
1247 };
1248 PyObject *result = NULL;
1249 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001250 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001251 int flags = obj->flags;
1252
1253 /* Omit re.UNICODE for valid string patterns. */
1254 if (obj->isbytes == 0 &&
1255 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1256 SRE_FLAG_UNICODE)
1257 flags &= ~SRE_FLAG_UNICODE;
1258
1259 flag_items = PyList_New(0);
1260 if (!flag_items)
1261 return NULL;
1262
1263 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1264 if (flags & flag_names[i].value) {
1265 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1266 if (!item)
1267 goto done;
1268
1269 if (PyList_Append(flag_items, item) < 0) {
1270 Py_DECREF(item);
1271 goto done;
1272 }
1273 Py_DECREF(item);
1274 flags &= ~flag_names[i].value;
1275 }
1276 }
1277 if (flags) {
1278 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1279 if (!item)
1280 goto done;
1281
1282 if (PyList_Append(flag_items, item) < 0) {
1283 Py_DECREF(item);
1284 goto done;
1285 }
1286 Py_DECREF(item);
1287 }
1288
1289 if (PyList_Size(flag_items) > 0) {
1290 PyObject *flags_result;
1291 PyObject *sep = PyUnicode_FromString("|");
1292 if (!sep)
1293 goto done;
1294 flags_result = PyUnicode_Join(sep, flag_items);
1295 Py_DECREF(sep);
1296 if (!flags_result)
1297 goto done;
1298 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1299 obj->pattern, flags_result);
1300 Py_DECREF(flags_result);
1301 }
1302 else {
1303 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1304 }
1305
1306done:
1307 Py_DECREF(flag_items);
1308 return result;
1309}
1310
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03001311PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001312
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001313/* PatternObject's 'groupindex' method. */
1314static PyObject *
1315pattern_groupindex(PatternObject *self)
1316{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001317 if (self->groupindex == NULL)
1318 return PyDict_New();
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001319 return PyDictProxy_New(self->groupindex);
1320}
1321
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001322static int _validate(PatternObject *self); /* Forward */
1323
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001324/*[clinic input]
1325_sre.compile
1326
1327 pattern: object
1328 flags: int
1329 code: object(subclass_of='&PyList_Type')
1330 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001331 groupindex: object(subclass_of='&PyDict_Type')
1332 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001333
1334[clinic start generated code]*/
1335
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001336static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001337_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001338 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1339 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001340/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001341{
1342 /* "compile" pattern descriptor to pattern object */
1343
1344 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001345 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001346
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001347 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001348 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001349 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1350 if (!self)
1351 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001352 self->weakreflist = NULL;
1353 self->pattern = NULL;
1354 self->groupindex = NULL;
1355 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001356
1357 self->codesize = n;
1358
1359 for (i = 0; i < n; i++) {
1360 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001361 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001362 self->code[i] = (SRE_CODE) value;
1363 if ((unsigned long) self->code[i] != value) {
1364 PyErr_SetString(PyExc_OverflowError,
1365 "regular expression code size limit exceeded");
1366 break;
1367 }
1368 }
1369
1370 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001371 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001372 return NULL;
1373 }
1374
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001375 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001376 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001377 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378 else {
1379 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001380 int charsize;
1381 Py_buffer view;
1382 view.buf = NULL;
1383 if (!getstring(pattern, &p_length, &self->isbytes,
1384 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001385 Py_DECREF(self);
1386 return NULL;
1387 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001388 if (view.buf)
1389 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001390 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001391
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001392 Py_INCREF(pattern);
1393 self->pattern = pattern;
1394
1395 self->flags = flags;
1396
1397 self->groups = groups;
1398
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001399 if (PyDict_GET_SIZE(groupindex) > 0) {
1400 Py_INCREF(groupindex);
1401 self->groupindex = groupindex;
1402 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1403 Py_INCREF(indexgroup);
1404 self->indexgroup = indexgroup;
1405 }
1406 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001407
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001408 if (!_validate(self)) {
1409 Py_DECREF(self);
1410 return NULL;
1411 }
1412
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001413 return (PyObject*) self;
1414}
1415
Guido van Rossumb700df92000-03-31 14:59:30 +00001416/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001417/* Code validation */
1418
1419/* To learn more about this code, have a look at the _compile() function in
1420 Lib/sre_compile.py. The validation functions below checks the code array
1421 for conformance with the code patterns generated there.
1422
1423 The nice thing about the generated code is that it is position-independent:
1424 all jumps are relative jumps forward. Also, jumps don't cross each other:
1425 the target of a later jump is always earlier than the target of an earlier
1426 jump. IOW, this is okay:
1427
1428 J---------J-------T--------T
1429 \ \_____/ /
1430 \______________________/
1431
1432 but this is not:
1433
1434 J---------J-------T--------T
1435 \_________\_____/ /
1436 \____________/
1437
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001438 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001439*/
1440
1441/* Defining this one enables tracing of the validator */
1442#undef VVERBOSE
1443
1444/* Trace macro for the validator */
1445#if defined(VVERBOSE)
1446#define VTRACE(v) printf v
1447#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001448#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001449#endif
1450
1451/* Report failure */
1452#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1453
1454/* Extract opcode, argument, or skip count from code array */
1455#define GET_OP \
1456 do { \
1457 VTRACE(("%p: ", code)); \
1458 if (code >= end) FAIL; \
1459 op = *code++; \
1460 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1461 } while (0)
1462#define GET_ARG \
1463 do { \
1464 VTRACE(("%p= ", code)); \
1465 if (code >= end) FAIL; \
1466 arg = *code++; \
1467 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1468 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001469#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001470 do { \
1471 VTRACE(("%p= ", code)); \
1472 if (code >= end) FAIL; \
1473 skip = *code; \
1474 VTRACE(("%lu (skip to %p)\n", \
1475 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001476 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001477 FAIL; \
1478 code++; \
1479 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001480#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001481
1482static int
1483_validate_charset(SRE_CODE *code, SRE_CODE *end)
1484{
1485 /* Some variables are manipulated by the macros above */
1486 SRE_CODE op;
1487 SRE_CODE arg;
1488 SRE_CODE offset;
1489 int i;
1490
1491 while (code < end) {
1492 GET_OP;
1493 switch (op) {
1494
1495 case SRE_OP_NEGATE:
1496 break;
1497
1498 case SRE_OP_LITERAL:
1499 GET_ARG;
1500 break;
1501
1502 case SRE_OP_RANGE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001503 case SRE_OP_RANGE_UNI_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001504 GET_ARG;
1505 GET_ARG;
1506 break;
1507
1508 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001509 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001510 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001511 FAIL;
1512 code += offset;
1513 break;
1514
1515 case SRE_OP_BIGCHARSET:
1516 GET_ARG; /* Number of blocks */
1517 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001518 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001519 FAIL;
1520 /* Make sure that each byte points to a valid block */
1521 for (i = 0; i < 256; i++) {
1522 if (((unsigned char *)code)[i] >= arg)
1523 FAIL;
1524 }
1525 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001526 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001527 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001528 FAIL;
1529 code += offset;
1530 break;
1531
1532 case SRE_OP_CATEGORY:
1533 GET_ARG;
1534 switch (arg) {
1535 case SRE_CATEGORY_DIGIT:
1536 case SRE_CATEGORY_NOT_DIGIT:
1537 case SRE_CATEGORY_SPACE:
1538 case SRE_CATEGORY_NOT_SPACE:
1539 case SRE_CATEGORY_WORD:
1540 case SRE_CATEGORY_NOT_WORD:
1541 case SRE_CATEGORY_LINEBREAK:
1542 case SRE_CATEGORY_NOT_LINEBREAK:
1543 case SRE_CATEGORY_LOC_WORD:
1544 case SRE_CATEGORY_LOC_NOT_WORD:
1545 case SRE_CATEGORY_UNI_DIGIT:
1546 case SRE_CATEGORY_UNI_NOT_DIGIT:
1547 case SRE_CATEGORY_UNI_SPACE:
1548 case SRE_CATEGORY_UNI_NOT_SPACE:
1549 case SRE_CATEGORY_UNI_WORD:
1550 case SRE_CATEGORY_UNI_NOT_WORD:
1551 case SRE_CATEGORY_UNI_LINEBREAK:
1552 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1553 break;
1554 default:
1555 FAIL;
1556 }
1557 break;
1558
1559 default:
1560 FAIL;
1561
1562 }
1563 }
1564
1565 return 1;
1566}
1567
1568static int
1569_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1570{
1571 /* Some variables are manipulated by the macros above */
1572 SRE_CODE op;
1573 SRE_CODE arg;
1574 SRE_CODE skip;
1575
1576 VTRACE(("code=%p, end=%p\n", code, end));
1577
1578 if (code > end)
1579 FAIL;
1580
1581 while (code < end) {
1582 GET_OP;
1583 switch (op) {
1584
1585 case SRE_OP_MARK:
1586 /* We don't check whether marks are properly nested; the
1587 sre_match() code is robust even if they don't, and the worst
1588 you can get is nonsensical match results. */
1589 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001590 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001591 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1592 FAIL;
1593 }
1594 break;
1595
1596 case SRE_OP_LITERAL:
1597 case SRE_OP_NOT_LITERAL:
1598 case SRE_OP_LITERAL_IGNORE:
1599 case SRE_OP_NOT_LITERAL_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001600 case SRE_OP_LITERAL_UNI_IGNORE:
1601 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001602 case SRE_OP_LITERAL_LOC_IGNORE:
1603 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001604 GET_ARG;
1605 /* The arg is just a character, nothing to check */
1606 break;
1607
1608 case SRE_OP_SUCCESS:
1609 case SRE_OP_FAILURE:
1610 /* Nothing to check; these normally end the matching process */
1611 break;
1612
1613 case SRE_OP_AT:
1614 GET_ARG;
1615 switch (arg) {
1616 case SRE_AT_BEGINNING:
1617 case SRE_AT_BEGINNING_STRING:
1618 case SRE_AT_BEGINNING_LINE:
1619 case SRE_AT_END:
1620 case SRE_AT_END_LINE:
1621 case SRE_AT_END_STRING:
1622 case SRE_AT_BOUNDARY:
1623 case SRE_AT_NON_BOUNDARY:
1624 case SRE_AT_LOC_BOUNDARY:
1625 case SRE_AT_LOC_NON_BOUNDARY:
1626 case SRE_AT_UNI_BOUNDARY:
1627 case SRE_AT_UNI_NON_BOUNDARY:
1628 break;
1629 default:
1630 FAIL;
1631 }
1632 break;
1633
1634 case SRE_OP_ANY:
1635 case SRE_OP_ANY_ALL:
1636 /* These have no operands */
1637 break;
1638
1639 case SRE_OP_IN:
1640 case SRE_OP_IN_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001641 case SRE_OP_IN_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001642 case SRE_OP_IN_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001643 GET_SKIP;
1644 /* Stop 1 before the end; we check the FAILURE below */
1645 if (!_validate_charset(code, code+skip-2))
1646 FAIL;
1647 if (code[skip-2] != SRE_OP_FAILURE)
1648 FAIL;
1649 code += skip-1;
1650 break;
1651
1652 case SRE_OP_INFO:
1653 {
1654 /* A minimal info field is
1655 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1656 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1657 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001658 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001659 SRE_CODE *newcode;
1660 GET_SKIP;
1661 newcode = code+skip-1;
1662 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001663 GET_ARG;
1664 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001665 /* Check that only valid flags are present */
1666 if ((flags & ~(SRE_INFO_PREFIX |
1667 SRE_INFO_LITERAL |
1668 SRE_INFO_CHARSET)) != 0)
1669 FAIL;
1670 /* PREFIX and CHARSET are mutually exclusive */
1671 if ((flags & SRE_INFO_PREFIX) &&
1672 (flags & SRE_INFO_CHARSET))
1673 FAIL;
1674 /* LITERAL implies PREFIX */
1675 if ((flags & SRE_INFO_LITERAL) &&
1676 !(flags & SRE_INFO_PREFIX))
1677 FAIL;
1678 /* Validate the prefix */
1679 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001680 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001681 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001682 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001683 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001684 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001685 FAIL;
1686 code += prefix_len;
1687 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001688 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001689 FAIL;
1690 /* Each overlap value should be < prefix_len */
1691 for (i = 0; i < prefix_len; i++) {
1692 if (code[i] >= prefix_len)
1693 FAIL;
1694 }
1695 code += prefix_len;
1696 }
1697 /* Validate the charset */
1698 if (flags & SRE_INFO_CHARSET) {
1699 if (!_validate_charset(code, newcode-1))
1700 FAIL;
1701 if (newcode[-1] != SRE_OP_FAILURE)
1702 FAIL;
1703 code = newcode;
1704 }
1705 else if (code != newcode) {
1706 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1707 FAIL;
1708 }
1709 }
1710 break;
1711
1712 case SRE_OP_BRANCH:
1713 {
1714 SRE_CODE *target = NULL;
1715 for (;;) {
1716 GET_SKIP;
1717 if (skip == 0)
1718 break;
1719 /* Stop 2 before the end; we check the JUMP below */
1720 if (!_validate_inner(code, code+skip-3, groups))
1721 FAIL;
1722 code += skip-3;
1723 /* Check that it ends with a JUMP, and that each JUMP
1724 has the same target */
1725 GET_OP;
1726 if (op != SRE_OP_JUMP)
1727 FAIL;
1728 GET_SKIP;
1729 if (target == NULL)
1730 target = code+skip-1;
1731 else if (code+skip-1 != target)
1732 FAIL;
1733 }
1734 }
1735 break;
1736
1737 case SRE_OP_REPEAT_ONE:
1738 case SRE_OP_MIN_REPEAT_ONE:
1739 {
1740 SRE_CODE min, max;
1741 GET_SKIP;
1742 GET_ARG; min = arg;
1743 GET_ARG; max = arg;
1744 if (min > max)
1745 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001746 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001747 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001748 if (!_validate_inner(code, code+skip-4, groups))
1749 FAIL;
1750 code += skip-4;
1751 GET_OP;
1752 if (op != SRE_OP_SUCCESS)
1753 FAIL;
1754 }
1755 break;
1756
1757 case SRE_OP_REPEAT:
1758 {
1759 SRE_CODE min, max;
1760 GET_SKIP;
1761 GET_ARG; min = arg;
1762 GET_ARG; max = arg;
1763 if (min > max)
1764 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001765 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001766 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001767 if (!_validate_inner(code, code+skip-3, groups))
1768 FAIL;
1769 code += skip-3;
1770 GET_OP;
1771 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1772 FAIL;
1773 }
1774 break;
1775
1776 case SRE_OP_GROUPREF:
1777 case SRE_OP_GROUPREF_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001778 case SRE_OP_GROUPREF_UNI_IGNORE:
1779 case SRE_OP_GROUPREF_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001780 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001781 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001782 FAIL;
1783 break;
1784
1785 case SRE_OP_GROUPREF_EXISTS:
1786 /* The regex syntax for this is: '(?(group)then|else)', where
1787 'group' is either an integer group number or a group name,
1788 'then' and 'else' are sub-regexes, and 'else' is optional. */
1789 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001790 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001791 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001792 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001793 code--; /* The skip is relative to the first arg! */
1794 /* There are two possibilities here: if there is both a 'then'
1795 part and an 'else' part, the generated code looks like:
1796
1797 GROUPREF_EXISTS
1798 <group>
1799 <skipyes>
1800 ...then part...
1801 JUMP
1802 <skipno>
1803 (<skipyes> jumps here)
1804 ...else part...
1805 (<skipno> jumps here)
1806
1807 If there is only a 'then' part, it looks like:
1808
1809 GROUPREF_EXISTS
1810 <group>
1811 <skip>
1812 ...then part...
1813 (<skip> jumps here)
1814
1815 There is no direct way to decide which it is, and we don't want
1816 to allow arbitrary jumps anywhere in the code; so we just look
1817 for a JUMP opcode preceding our skip target.
1818 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001819 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001820 code[skip-3] == SRE_OP_JUMP)
1821 {
1822 VTRACE(("both then and else parts present\n"));
1823 if (!_validate_inner(code+1, code+skip-3, groups))
1824 FAIL;
1825 code += skip-2; /* Position after JUMP, at <skipno> */
1826 GET_SKIP;
1827 if (!_validate_inner(code, code+skip-1, groups))
1828 FAIL;
1829 code += skip-1;
1830 }
1831 else {
1832 VTRACE(("only a then part present\n"));
1833 if (!_validate_inner(code+1, code+skip-1, groups))
1834 FAIL;
1835 code += skip-1;
1836 }
1837 break;
1838
1839 case SRE_OP_ASSERT:
1840 case SRE_OP_ASSERT_NOT:
1841 GET_SKIP;
1842 GET_ARG; /* 0 for lookahead, width for lookbehind */
1843 code--; /* Back up over arg to simplify math below */
1844 if (arg & 0x80000000)
1845 FAIL; /* Width too large */
1846 /* Stop 1 before the end; we check the SUCCESS below */
1847 if (!_validate_inner(code+1, code+skip-2, groups))
1848 FAIL;
1849 code += skip-2;
1850 GET_OP;
1851 if (op != SRE_OP_SUCCESS)
1852 FAIL;
1853 break;
1854
1855 default:
1856 FAIL;
1857
1858 }
1859 }
1860
1861 VTRACE(("okay\n"));
1862 return 1;
1863}
1864
1865static int
1866_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1867{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001868 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1869 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001870 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001871 return _validate_inner(code, end-1, groups);
1872}
1873
1874static int
1875_validate(PatternObject *self)
1876{
1877 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1878 {
1879 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1880 return 0;
1881 }
1882 else
1883 VTRACE(("Success!\n"));
1884 return 1;
1885}
1886
1887/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001888/* match methods */
1889
1890static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001891match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001892{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001893 Py_XDECREF(self->regs);
1894 Py_XDECREF(self->string);
1895 Py_DECREF(self->pattern);
1896 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001897}
1898
1899static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001900match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001901{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001902 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001903 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001904 Py_buffer view;
1905 PyObject *result;
1906 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001907 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001908
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001909 if (index < 0 || index >= self->groups) {
1910 /* raise IndexError if we were given a bad group number */
1911 PyErr_SetString(
1912 PyExc_IndexError,
1913 "no such group"
1914 );
1915 return NULL;
1916 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001917
Fredrik Lundh6f013982000-07-03 18:44:21 +00001918 index *= 2;
1919
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001920 if (self->string == Py_None || self->mark[index] < 0) {
1921 /* return default value if the string or group is undefined */
1922 Py_INCREF(def);
1923 return def;
1924 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001925
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001926 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001927 if (ptr == NULL)
1928 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001929
1930 i = self->mark[index];
1931 j = self->mark[index+1];
1932 i = Py_MIN(i, length);
1933 j = Py_MIN(j, length);
1934 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001935 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001936 PyBuffer_Release(&view);
1937 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001938}
1939
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001940static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001941match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001942{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001943 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001944
Guido van Rossumddefaf32007-01-14 03:31:43 +00001945 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001946 /* Default value */
1947 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001948
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001949 if (PyIndex_Check(index)) {
1950 return PyNumber_AsSsize_t(index, NULL);
1951 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001952
Fredrik Lundh6f013982000-07-03 18:44:21 +00001953 i = -1;
1954
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001955 if (self->pattern->groupindex) {
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001956 index = PyDict_GetItem(self->pattern->groupindex, index);
1957 if (index && PyLong_Check(index)) {
1958 i = PyLong_AsSsize_t(index);
1959 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001960 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001961
1962 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001963}
1964
1965static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001966match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001967{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001968 return match_getslice_by_index(self, match_getindex(self, index), def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001969}
1970
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001971/*[clinic input]
1972_sre.SRE_Match.expand
1973
1974 template: object
1975
1976Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1977[clinic start generated code]*/
1978
1979static PyObject *
1980_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1981/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001982{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001983 /* delegate to Python code */
1984 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001985 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001986 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001987 );
1988}
1989
1990static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001991match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001992{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001993 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001994 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001995
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001996 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001997
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001998 switch (size) {
1999 case 0:
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002000 result = match_getslice(self, _PyLong_Zero, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002001 break;
2002 case 1:
2003 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2004 break;
2005 default:
2006 /* fetch multiple items */
2007 result = PyTuple_New(size);
2008 if (!result)
2009 return NULL;
2010 for (i = 0; i < size; i++) {
2011 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002012 self, PyTuple_GET_ITEM(args, i), Py_None
2013 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002014 if (!item) {
2015 Py_DECREF(result);
2016 return NULL;
2017 }
2018 PyTuple_SET_ITEM(result, i, item);
2019 }
2020 break;
2021 }
2022 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002023}
2024
Eric V. Smith605bdae2016-09-11 08:55:43 -04002025static PyObject*
2026match_getitem(MatchObject* self, PyObject* name)
2027{
2028 return match_getslice(self, name, Py_None);
2029}
2030
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002031/*[clinic input]
2032_sre.SRE_Match.groups
2033
2034 default: object = None
2035 Is used for groups that did not participate in the match.
2036
2037Return a tuple containing all the subgroups of the match, from 1.
2038[clinic start generated code]*/
2039
2040static PyObject *
2041_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2042/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002043{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002044 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002045 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002046
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002047 result = PyTuple_New(self->groups-1);
2048 if (!result)
2049 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002050
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002051 for (index = 1; index < self->groups; index++) {
2052 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002053 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002054 if (!item) {
2055 Py_DECREF(result);
2056 return NULL;
2057 }
2058 PyTuple_SET_ITEM(result, index-1, item);
2059 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002060
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002061 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002062}
2063
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002064/*[clinic input]
2065_sre.SRE_Match.groupdict
2066
2067 default: object = None
2068 Is used for groups that did not participate in the match.
2069
2070Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2071[clinic start generated code]*/
2072
2073static PyObject *
2074_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2075/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002076{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002077 PyObject *result;
2078 PyObject *key;
2079 PyObject *value;
2080 Py_ssize_t pos = 0;
2081 Py_hash_t hash;
Guido van Rossumb700df92000-03-31 14:59:30 +00002082
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002083 result = PyDict_New();
2084 if (!result || !self->pattern->groupindex)
2085 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002086
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002087 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002088 int status;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002089 Py_INCREF(key);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002090 value = match_getslice(self, key, default_value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002091 if (!value) {
2092 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002093 goto failed;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002094 }
2095 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002096 Py_DECREF(value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002097 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002098 if (status < 0)
2099 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002100 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002101
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002102 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002103
2104failed:
Fredrik Lundh770617b2001-01-14 15:06:11 +00002105 Py_DECREF(result);
2106 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002107}
2108
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002109/*[clinic input]
2110_sre.SRE_Match.start -> Py_ssize_t
2111
2112 group: object(c_default="NULL") = 0
2113 /
2114
2115Return index of the start of the substring matched by group.
2116[clinic start generated code]*/
2117
2118static Py_ssize_t
2119_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2120/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002121{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002122 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002123
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002124 if (index < 0 || index >= self->groups) {
2125 PyErr_SetString(
2126 PyExc_IndexError,
2127 "no such group"
2128 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002129 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002130 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002131
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002132 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002133 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002134}
2135
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002136/*[clinic input]
2137_sre.SRE_Match.end -> Py_ssize_t
2138
2139 group: object(c_default="NULL") = 0
2140 /
2141
2142Return index of the end of the substring matched by group.
2143[clinic start generated code]*/
2144
2145static Py_ssize_t
2146_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2147/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002148{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002149 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002150
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002151 if (index < 0 || index >= self->groups) {
2152 PyErr_SetString(
2153 PyExc_IndexError,
2154 "no such group"
2155 );
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002156 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002157 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002158
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002159 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002160 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002161}
2162
2163LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002164_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002165{
2166 PyObject* pair;
2167 PyObject* item;
2168
2169 pair = PyTuple_New(2);
2170 if (!pair)
2171 return NULL;
2172
Christian Heimes217cfd12007-12-02 14:31:20 +00002173 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002174 if (!item)
2175 goto error;
2176 PyTuple_SET_ITEM(pair, 0, item);
2177
Christian Heimes217cfd12007-12-02 14:31:20 +00002178 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002179 if (!item)
2180 goto error;
2181 PyTuple_SET_ITEM(pair, 1, item);
2182
2183 return pair;
2184
2185 error:
2186 Py_DECREF(pair);
2187 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002188}
2189
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002190/*[clinic input]
2191_sre.SRE_Match.span
2192
2193 group: object(c_default="NULL") = 0
2194 /
2195
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002196For match object m, return the 2-tuple (m.start(group), m.end(group)).
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002197[clinic start generated code]*/
2198
2199static PyObject *
2200_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002201/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002202{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002203 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002204
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002205 if (index < 0 || index >= self->groups) {
2206 PyErr_SetString(
2207 PyExc_IndexError,
2208 "no such group"
2209 );
2210 return NULL;
2211 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002212
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002213 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002214 return _pair(self->mark[index*2], self->mark[index*2+1]);
2215}
2216
2217static PyObject*
2218match_regs(MatchObject* self)
2219{
2220 PyObject* regs;
2221 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002222 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002223
2224 regs = PyTuple_New(self->groups);
2225 if (!regs)
2226 return NULL;
2227
2228 for (index = 0; index < self->groups; index++) {
2229 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2230 if (!item) {
2231 Py_DECREF(regs);
2232 return NULL;
2233 }
2234 PyTuple_SET_ITEM(regs, index, item);
2235 }
2236
2237 Py_INCREF(regs);
2238 self->regs = regs;
2239
2240 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002241}
2242
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002243/*[clinic input]
2244_sre.SRE_Match.__copy__
2245
2246[clinic start generated code]*/
2247
2248static PyObject *
2249_sre_SRE_Match___copy___impl(MatchObject *self)
2250/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002251{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002252 Py_INCREF(self);
2253 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002254}
2255
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002256/*[clinic input]
2257_sre.SRE_Match.__deepcopy__
2258
2259 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002260 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002261
2262[clinic start generated code]*/
2263
2264static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002265_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2266/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002267{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002268 Py_INCREF(self);
2269 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002270}
2271
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002272PyDoc_STRVAR(match_doc,
2273"The result of re.match() and re.search().\n\
2274Match objects always have a boolean value of True.");
2275
2276PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002277"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002278 Return subgroup(s) of the match by indices or names.\n\
2279 For 0 returns the entire match.");
2280
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002281static PyObject *
2282match_lastindex_get(MatchObject *self)
Guido van Rossumb700df92000-03-31 14:59:30 +00002283{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002284 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002285 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002286 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002287}
2288
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002289static PyObject *
2290match_lastgroup_get(MatchObject *self)
2291{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002292 if (self->pattern->indexgroup &&
2293 self->lastindex >= 0 &&
2294 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2295 {
2296 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2297 self->lastindex);
2298 Py_INCREF(result);
2299 return result;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002300 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002301 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002302}
2303
2304static PyObject *
2305match_regs_get(MatchObject *self)
2306{
2307 if (self->regs) {
2308 Py_INCREF(self->regs);
2309 return self->regs;
2310 } else
2311 return match_regs(self);
2312}
2313
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002314static PyObject *
2315match_repr(MatchObject *self)
2316{
2317 PyObject *result;
2318 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2319 if (group0 == NULL)
2320 return NULL;
2321 result = PyUnicode_FromFormat(
2322 "<%s object; span=(%d, %d), match=%.50R>",
2323 Py_TYPE(self)->tp_name,
2324 self->mark[0], self->mark[1], group0);
2325 Py_DECREF(group0);
2326 return result;
2327}
2328
2329
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002330static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002331pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002332{
2333 /* create match object (from state object) */
2334
2335 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002336 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002337 char* base;
2338 int n;
2339
2340 if (status > 0) {
2341
2342 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002343 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002344 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2345 2*(pattern->groups+1));
2346 if (!match)
2347 return NULL;
2348
2349 Py_INCREF(pattern);
2350 match->pattern = pattern;
2351
2352 Py_INCREF(state->string);
2353 match->string = state->string;
2354
2355 match->regs = NULL;
2356 match->groups = pattern->groups+1;
2357
2358 /* fill in group slices */
2359
2360 base = (char*) state->beginning;
2361 n = state->charsize;
2362
2363 match->mark[0] = ((char*) state->start - base) / n;
2364 match->mark[1] = ((char*) state->ptr - base) / n;
2365
2366 for (i = j = 0; i < pattern->groups; i++, j+=2)
2367 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2368 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2369 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2370 } else
2371 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2372
2373 match->pos = state->pos;
2374 match->endpos = state->endpos;
2375
2376 match->lastindex = state->lastindex;
2377
2378 return (PyObject*) match;
2379
2380 } else if (status == 0) {
2381
2382 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002383 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002384
2385 }
2386
2387 /* internal error */
2388 pattern_error(status);
2389 return NULL;
2390}
2391
2392
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002393/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002394/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002395
2396static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002397scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002398{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002399 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002400 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002401 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002402}
2403
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002404/*[clinic input]
2405_sre.SRE_Scanner.match
2406
2407[clinic start generated code]*/
2408
2409static PyObject *
2410_sre_SRE_Scanner_match_impl(ScannerObject *self)
2411/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002412{
2413 SRE_STATE* state = &self->state;
2414 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002415 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002416
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002417 if (state->start == NULL)
2418 Py_RETURN_NONE;
2419
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002420 state_reset(state);
2421
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002422 state->ptr = state->start;
2423
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002424 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002425 if (PyErr_Occurred())
2426 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002427
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002428 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002429 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002430
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002431 if (status == 0)
2432 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002433 else {
2434 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002435 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002436 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002437
2438 return match;
2439}
2440
2441
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002442/*[clinic input]
2443_sre.SRE_Scanner.search
2444
2445[clinic start generated code]*/
2446
2447static PyObject *
2448_sre_SRE_Scanner_search_impl(ScannerObject *self)
2449/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002450{
2451 SRE_STATE* state = &self->state;
2452 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002453 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002454
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002455 if (state->start == NULL)
2456 Py_RETURN_NONE;
2457
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002458 state_reset(state);
2459
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002460 state->ptr = state->start;
2461
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002462 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002463 if (PyErr_Occurred())
2464 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002465
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002466 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002467 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002468
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002469 if (status == 0)
2470 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002471 else {
2472 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002473 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002474 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002475
2476 return match;
2477}
2478
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002479static PyObject *
2480pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002481{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002482 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002483
2484 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002485 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2486 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002487 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002488 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002489
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002490 /* create search state object */
2491 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2492 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002493 return NULL;
2494 }
2495
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002496 Py_INCREF(self);
2497 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002498
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002499 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002500}
2501
Victor Stinnerb44fb122016-11-21 16:35:08 +01002502static Py_hash_t
2503pattern_hash(PatternObject *self)
2504{
2505 Py_hash_t hash, hash2;
2506
2507 hash = PyObject_Hash(self->pattern);
2508 if (hash == -1) {
2509 return -1;
2510 }
2511
2512 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2513 hash ^= hash2;
2514
2515 hash ^= self->flags;
2516 hash ^= self->isbytes;
2517 hash ^= self->codesize;
2518
2519 if (hash == -1) {
2520 hash = -2;
2521 }
2522 return hash;
2523}
2524
2525static PyObject*
2526pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2527{
2528 PatternObject *left, *right;
2529 int cmp;
2530
2531 if (op != Py_EQ && op != Py_NE) {
2532 Py_RETURN_NOTIMPLEMENTED;
2533 }
2534
2535 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2536 Py_RETURN_NOTIMPLEMENTED;
2537 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002538
2539 if (lefto == righto) {
2540 /* a pattern is equal to itself */
2541 return PyBool_FromLong(op == Py_EQ);
2542 }
2543
Victor Stinnerb44fb122016-11-21 16:35:08 +01002544 left = (PatternObject *)lefto;
2545 right = (PatternObject *)righto;
2546
2547 cmp = (left->flags == right->flags
2548 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002549 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002550 if (cmp) {
2551 /* Compare the code and the pattern because the same pattern can
2552 produce different codes depending on the locale used to compile the
2553 pattern when the re.LOCALE flag is used. Don't compare groups,
2554 indexgroup nor groupindex: they are derivated from the pattern. */
2555 cmp = (memcmp(left->code, right->code,
2556 sizeof(left->code[0]) * left->codesize) == 0);
2557 }
2558 if (cmp) {
2559 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2560 Py_EQ);
2561 if (cmp < 0) {
2562 return NULL;
2563 }
2564 }
2565 if (op == Py_NE) {
2566 cmp = !cmp;
2567 }
2568 return PyBool_FromLong(cmp);
2569}
2570
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002571#include "clinic/_sre.c.h"
2572
2573static PyMethodDef pattern_methods[] = {
2574 _SRE_SRE_PATTERN_MATCH_METHODDEF
2575 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2576 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2577 _SRE_SRE_PATTERN_SUB_METHODDEF
2578 _SRE_SRE_PATTERN_SUBN_METHODDEF
2579 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2580 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2581 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2582 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2583 _SRE_SRE_PATTERN___COPY___METHODDEF
2584 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2585 {NULL, NULL}
2586};
2587
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002588static PyGetSetDef pattern_getset[] = {
2589 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2590 "A dictionary mapping group names to group numbers."},
2591 {NULL} /* Sentinel */
2592};
2593
2594#define PAT_OFF(x) offsetof(PatternObject, x)
2595static PyMemberDef pattern_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002596 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2597 "The pattern string from which the RE object was compiled."},
2598 {"flags", T_INT, PAT_OFF(flags), READONLY,
2599 "The regex matching flags."},
2600 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2601 "The number of capturing groups in the pattern."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002602 {NULL} /* Sentinel */
2603};
2604
2605static PyTypeObject Pattern_Type = {
2606 PyVarObject_HEAD_INIT(NULL, 0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002607 "re.Pattern",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002608 sizeof(PatternObject), sizeof(SRE_CODE),
2609 (destructor)pattern_dealloc, /* tp_dealloc */
2610 0, /* tp_print */
2611 0, /* tp_getattr */
2612 0, /* tp_setattr */
2613 0, /* tp_reserved */
2614 (reprfunc)pattern_repr, /* tp_repr */
2615 0, /* tp_as_number */
2616 0, /* tp_as_sequence */
2617 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002618 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002619 0, /* tp_call */
2620 0, /* tp_str */
2621 0, /* tp_getattro */
2622 0, /* tp_setattro */
2623 0, /* tp_as_buffer */
2624 Py_TPFLAGS_DEFAULT, /* tp_flags */
2625 pattern_doc, /* tp_doc */
2626 0, /* tp_traverse */
2627 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002628 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002629 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2630 0, /* tp_iter */
2631 0, /* tp_iternext */
2632 pattern_methods, /* tp_methods */
2633 pattern_members, /* tp_members */
2634 pattern_getset, /* tp_getset */
2635};
2636
Eric V. Smith605bdae2016-09-11 08:55:43 -04002637/* Match objects do not support length or assignment, but do support
2638 __getitem__. */
2639static PyMappingMethods match_as_mapping = {
2640 NULL,
2641 (binaryfunc)match_getitem,
2642 NULL
2643};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002644
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002645static PyMethodDef match_methods[] = {
2646 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2647 _SRE_SRE_MATCH_START_METHODDEF
2648 _SRE_SRE_MATCH_END_METHODDEF
2649 _SRE_SRE_MATCH_SPAN_METHODDEF
2650 _SRE_SRE_MATCH_GROUPS_METHODDEF
2651 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2652 _SRE_SRE_MATCH_EXPAND_METHODDEF
2653 _SRE_SRE_MATCH___COPY___METHODDEF
2654 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2655 {NULL, NULL}
2656};
2657
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002658static PyGetSetDef match_getset[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002659 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2660 "The integer index of the last matched capturing group."},
2661 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2662 "The name of the last matched capturing group."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002663 {"regs", (getter)match_regs_get, (setter)NULL},
2664 {NULL}
2665};
2666
2667#define MATCH_OFF(x) offsetof(MatchObject, x)
2668static PyMemberDef match_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002669 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2670 "The string passed to match() or search()."},
2671 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2672 "The regular expression object."},
2673 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2674 "The index into the string at which the RE engine started looking for a match."},
2675 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2676 "The index into the string beyond which the RE engine will not go."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002677 {NULL}
2678};
2679
2680/* FIXME: implement setattr("string", None) as a special case (to
2681 detach the associated string, if any */
2682
2683static PyTypeObject Match_Type = {
2684 PyVarObject_HEAD_INIT(NULL,0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002685 "re.Match",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002686 sizeof(MatchObject), sizeof(Py_ssize_t),
2687 (destructor)match_dealloc, /* tp_dealloc */
2688 0, /* tp_print */
2689 0, /* tp_getattr */
2690 0, /* tp_setattr */
2691 0, /* tp_reserved */
2692 (reprfunc)match_repr, /* tp_repr */
2693 0, /* tp_as_number */
2694 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002695 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002696 0, /* tp_hash */
2697 0, /* tp_call */
2698 0, /* tp_str */
2699 0, /* tp_getattro */
2700 0, /* tp_setattro */
2701 0, /* tp_as_buffer */
2702 Py_TPFLAGS_DEFAULT, /* tp_flags */
2703 match_doc, /* tp_doc */
2704 0, /* tp_traverse */
2705 0, /* tp_clear */
2706 0, /* tp_richcompare */
2707 0, /* tp_weaklistoffset */
2708 0, /* tp_iter */
2709 0, /* tp_iternext */
2710 match_methods, /* tp_methods */
2711 match_members, /* tp_members */
2712 match_getset, /* tp_getset */
2713};
2714
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002715static PyMethodDef scanner_methods[] = {
2716 _SRE_SRE_SCANNER_MATCH_METHODDEF
2717 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2718 {NULL, NULL}
2719};
2720
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002721#define SCAN_OFF(x) offsetof(ScannerObject, x)
2722static PyMemberDef scanner_members[] = {
2723 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2724 {NULL} /* Sentinel */
2725};
2726
2727static PyTypeObject Scanner_Type = {
2728 PyVarObject_HEAD_INIT(NULL, 0)
2729 "_" SRE_MODULE ".SRE_Scanner",
2730 sizeof(ScannerObject), 0,
2731 (destructor)scanner_dealloc,/* tp_dealloc */
2732 0, /* tp_print */
2733 0, /* tp_getattr */
2734 0, /* tp_setattr */
2735 0, /* tp_reserved */
2736 0, /* tp_repr */
2737 0, /* tp_as_number */
2738 0, /* tp_as_sequence */
2739 0, /* tp_as_mapping */
2740 0, /* tp_hash */
2741 0, /* tp_call */
2742 0, /* tp_str */
2743 0, /* tp_getattro */
2744 0, /* tp_setattro */
2745 0, /* tp_as_buffer */
2746 Py_TPFLAGS_DEFAULT, /* tp_flags */
2747 0, /* tp_doc */
2748 0, /* tp_traverse */
2749 0, /* tp_clear */
2750 0, /* tp_richcompare */
2751 0, /* tp_weaklistoffset */
2752 0, /* tp_iter */
2753 0, /* tp_iternext */
2754 scanner_methods, /* tp_methods */
2755 scanner_members, /* tp_members */
2756 0, /* tp_getset */
2757};
2758
Guido van Rossumb700df92000-03-31 14:59:30 +00002759static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002760 _SRE_COMPILE_METHODDEF
2761 _SRE_GETCODESIZE_METHODDEF
Serhiy Storchaka6d336a02017-05-09 23:37:14 +03002762 _SRE_ASCII_ISCASED_METHODDEF
2763 _SRE_UNICODE_ISCASED_METHODDEF
Serhiy Storchaka7186cc22017-05-05 10:42:46 +03002764 _SRE_ASCII_TOLOWER_METHODDEF
2765 _SRE_UNICODE_TOLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002766 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002767};
2768
Martin v. Löwis1a214512008-06-11 05:26:20 +00002769static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002770 PyModuleDef_HEAD_INIT,
2771 "_" SRE_MODULE,
2772 NULL,
2773 -1,
2774 _functions,
2775 NULL,
2776 NULL,
2777 NULL,
2778 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002779};
2780
2781PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002782{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002783 PyObject* m;
2784 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002785 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002786
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002787 /* Patch object types */
2788 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2789 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002790 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002791
Martin v. Löwis1a214512008-06-11 05:26:20 +00002792 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002793 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002794 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002795 d = PyModule_GetDict(m);
2796
Christian Heimes217cfd12007-12-02 14:31:20 +00002797 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002798 if (x) {
2799 PyDict_SetItemString(d, "MAGIC", x);
2800 Py_DECREF(x);
2801 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002802
Christian Heimes217cfd12007-12-02 14:31:20 +00002803 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002804 if (x) {
2805 PyDict_SetItemString(d, "CODESIZE", x);
2806 Py_DECREF(x);
2807 }
2808
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002809 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2810 if (x) {
2811 PyDict_SetItemString(d, "MAXREPEAT", x);
2812 Py_DECREF(x);
2813 }
2814
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002815 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2816 if (x) {
2817 PyDict_SetItemString(d, "MAXGROUPS", x);
2818 Py_DECREF(x);
2819 }
2820
Neal Norwitzfe537132007-08-26 03:55:15 +00002821 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002822 if (x) {
2823 PyDict_SetItemString(d, "copyright", x);
2824 Py_DECREF(x);
2825 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002826 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002827}
2828
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002829/* vim:ts=4:sw=4:et
2830*/