blob: d4fe588cbe27c601cbd8ba4138bedd3e6e4ec371 [file] [log] [blame]
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001/*
Guido van Rossumb700df92000-03-31 14:59:30 +00002 * Secret Labs' Regular Expression Engine
Guido van Rossumb700df92000-03-31 14:59:30 +00003 *
Fredrik Lundh6c68dc72000-06-29 10:34:56 +00004 * regular expression matching engine
Guido van Rossumb700df92000-03-31 14:59:30 +00005 *
6 * partial history:
Serhiy Storchaka32eddc12013-11-23 23:20:30 +02007 * 1999-10-24 fl created (based on existing template matcher code)
8 * 2000-03-06 fl first alpha, sort of
9 * 2000-08-01 fl fixes for 1.6b1
10 * 2000-08-07 fl use PyOS_CheckStack() if available
11 * 2000-09-20 fl added expand method
12 * 2001-03-20 fl lots of fixes for 2.1b2
13 * 2001-04-15 fl export copyright as Python attribute, not global
14 * 2001-04-28 fl added __copy__ methods (work in progress)
15 * 2001-05-14 fl fixes for 1.5.2 compatibility
16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19 * 2001-10-21 fl added sub/subn primitive
20 * 2001-10-24 fl added finditer primitive (for 2.2 only)
21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22 * 2002-11-09 fl fixed empty sub/subn return type
23 * 2003-04-18 mvl fully support 4-byte codes
24 * 2003-10-17 gn implemented non recursive scheme
25 * 2013-02-04 mrab added fullmatch primitive
Guido van Rossumb700df92000-03-31 14:59:30 +000026 *
Fredrik Lundh770617b2001-01-14 15:06:11 +000027 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
Guido van Rossumb700df92000-03-31 14:59:30 +000028 *
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000029 * This version of the SRE library can be redistributed under CNRI's
30 * Python 1.6 license. For any other use, please contact Secret Labs
31 * AB (info@pythonware.com).
32 *
Guido van Rossumb700df92000-03-31 14:59:30 +000033 * Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000034 * CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossumb700df92000-03-31 14:59:30 +000035 * other compatibility work.
36 */
37
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020038static const char copyright[] =
Fredrik Lundh09705f02002-11-22 12:46:35 +000039 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
Guido van Rossumb700df92000-03-31 14:59:30 +000040
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041#define PY_SSIZE_T_CLEAN
42
Guido van Rossumb700df92000-03-31 14:59:30 +000043#include "Python.h"
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +000044#include "structmember.h" /* offsetof */
Guido van Rossumb700df92000-03-31 14:59:30 +000045
46#include "sre.h"
47
Serhiy Storchaka9eabac62013-10-26 10:45:48 +030048#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000050#include <ctype.h>
Guido van Rossumb700df92000-03-31 14:59:30 +000051
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052/* name of this module, minus the leading underscore */
Fredrik Lundh1c5aa692001-01-16 07:37:30 +000053#if !defined(SRE_MODULE)
54#define SRE_MODULE "sre"
55#endif
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056
Thomas Wouters9ada3d62006-04-21 09:47:09 +000057#define SRE_PY_MODULE "re"
58
Guido van Rossumb700df92000-03-31 14:59:30 +000059/* defining this one enables tracing */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060#undef VERBOSE
Guido van Rossumb700df92000-03-31 14:59:30 +000061
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000062/* -------------------------------------------------------------------- */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000063
Fredrik Lundh80946112000-06-29 18:03:25 +000064#if defined(_MSC_VER)
Guido van Rossumb700df92000-03-31 14:59:30 +000065#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
Fredrik Lundh28552902000-07-05 21:14:16 +000066#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
Guido van Rossumb700df92000-03-31 14:59:30 +000067/* fastest possible local call under MSVC */
68#define LOCAL(type) static __inline type __fastcall
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000069#else
Benjamin Peterson791dc832017-04-20 23:52:19 -070070#define LOCAL(type) static inline type
Guido van Rossumb700df92000-03-31 14:59:30 +000071#endif
72
73/* error codes */
74#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000075#define SRE_ERROR_STATE -2 /* illegal state */
Fredrik Lundh96ab4652000-08-03 16:29:50 +000076#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
Guido van Rossumb700df92000-03-31 14:59:30 +000077#define SRE_ERROR_MEMORY -9 /* out of memory */
Christian Heimes2380ac72008-01-09 00:17:24 +000078#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
Guido van Rossumb700df92000-03-31 14:59:30 +000079
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000080#if defined(VERBOSE)
Guido van Rossumb700df92000-03-31 14:59:30 +000081#define TRACE(v) printf v
Guido van Rossumb700df92000-03-31 14:59:30 +000082#else
83#define TRACE(v)
84#endif
85
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000086/* -------------------------------------------------------------------- */
87/* search engine state */
Guido van Rossumb700df92000-03-31 14:59:30 +000088
Fredrik Lundh436c3d582000-06-29 08:58:44 +000089#define SRE_IS_DIGIT(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050090 ((ch) <= '9' && Py_ISDIGIT(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000091#define SRE_IS_SPACE(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050092 ((ch) <= ' ' && Py_ISSPACE(ch))
Fredrik Lundh436c3d582000-06-29 08:58:44 +000093#define SRE_IS_LINEBREAK(ch)\
Serhiy Storchaka5aa47442014-10-10 11:10:46 +030094 ((ch) == '\n')
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095#define SRE_IS_WORD(ch)\
Sergey Fedoseevec014a12018-09-12 03:47:59 +050096 ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
Guido van Rossumb700df92000-03-31 14:59:30 +000097
Serhiy Storchaka3557b052017-10-24 23:31:42 +030098static unsigned int sre_lower_ascii(unsigned int ch)
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +000099{
Serhiy Storchaka5aa47442014-10-10 11:10:46 +0300100 return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000101}
102
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000103/* locale-specific character predicates */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000104/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
105 * warnings when c's type supports only numbers < N+1 */
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000106#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000107#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
108
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000109static unsigned int sre_lower_locale(unsigned int ch)
110{
Gustavo Niemeyer601b9632004-02-14 00:31:13 +0000111 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000112}
113
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200114static unsigned int sre_upper_locale(unsigned int ch)
115{
116 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
117}
118
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000119/* unicode-specific character predicates */
120
Victor Stinner0058b862011-09-29 03:27:47 +0200121#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
122#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
123#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
124#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
125#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000126
127static unsigned int sre_lower_unicode(unsigned int ch)
128{
Victor Stinner0058b862011-09-29 03:27:47 +0200129 return (unsigned int) Py_UNICODE_TOLOWER(ch);
Fredrik Lundhb25e1ad2001-03-22 15:50:10 +0000130}
131
Serhiy Storchaka4b8f8942014-10-31 12:36:56 +0200132static unsigned int sre_upper_unicode(unsigned int ch)
133{
134 return (unsigned int) Py_UNICODE_TOUPPER(ch);
135}
136
Guido van Rossumb700df92000-03-31 14:59:30 +0000137LOCAL(int)
138sre_category(SRE_CODE category, unsigned int ch)
139{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000140 switch (category) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000141
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000142 case SRE_CATEGORY_DIGIT:
143 return SRE_IS_DIGIT(ch);
144 case SRE_CATEGORY_NOT_DIGIT:
145 return !SRE_IS_DIGIT(ch);
146 case SRE_CATEGORY_SPACE:
147 return SRE_IS_SPACE(ch);
148 case SRE_CATEGORY_NOT_SPACE:
149 return !SRE_IS_SPACE(ch);
150 case SRE_CATEGORY_WORD:
151 return SRE_IS_WORD(ch);
152 case SRE_CATEGORY_NOT_WORD:
153 return !SRE_IS_WORD(ch);
154 case SRE_CATEGORY_LINEBREAK:
155 return SRE_IS_LINEBREAK(ch);
156 case SRE_CATEGORY_NOT_LINEBREAK:
157 return !SRE_IS_LINEBREAK(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000158
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000159 case SRE_CATEGORY_LOC_WORD:
160 return SRE_LOC_IS_WORD(ch);
161 case SRE_CATEGORY_LOC_NOT_WORD:
162 return !SRE_LOC_IS_WORD(ch);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000163
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000164 case SRE_CATEGORY_UNI_DIGIT:
165 return SRE_UNI_IS_DIGIT(ch);
166 case SRE_CATEGORY_UNI_NOT_DIGIT:
167 return !SRE_UNI_IS_DIGIT(ch);
168 case SRE_CATEGORY_UNI_SPACE:
169 return SRE_UNI_IS_SPACE(ch);
170 case SRE_CATEGORY_UNI_NOT_SPACE:
171 return !SRE_UNI_IS_SPACE(ch);
172 case SRE_CATEGORY_UNI_WORD:
173 return SRE_UNI_IS_WORD(ch);
174 case SRE_CATEGORY_UNI_NOT_WORD:
175 return !SRE_UNI_IS_WORD(ch);
176 case SRE_CATEGORY_UNI_LINEBREAK:
177 return SRE_UNI_IS_LINEBREAK(ch);
178 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
179 return !SRE_UNI_IS_LINEBREAK(ch);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000180 }
181 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000182}
183
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300184LOCAL(int)
185char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
186{
187 return ch == pattern
188 || (SRE_CODE) sre_lower_locale(ch) == pattern
189 || (SRE_CODE) sre_upper_locale(ch) == pattern;
190}
191
192
Guido van Rossumb700df92000-03-31 14:59:30 +0000193/* helpers */
194
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000195static void
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000196data_stack_dealloc(SRE_STATE* state)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000197{
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000198 if (state->data_stack) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000199 PyMem_FREE(state->data_stack);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000200 state->data_stack = NULL;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000201 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000202 state->data_stack_size = state->data_stack_base = 0;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000203}
204
205static int
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206data_stack_grow(SRE_STATE* state, Py_ssize_t size)
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000207{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000208 Py_ssize_t minsize, cursize;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000209 minsize = state->data_stack_base+size;
210 cursize = state->data_stack_size;
211 if (cursize < minsize) {
212 void* stack;
213 cursize = minsize+minsize/4+1024;
Serhiy Storchaka134f0de2013-09-05 18:01:15 +0300214 TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 stack = PyMem_REALLOC(state->data_stack, cursize);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000216 if (!stack) {
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000217 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000218 return SRE_ERROR_MEMORY;
219 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000220 state->data_stack = (char *)stack;
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000221 state->data_stack_size = cursize;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000222 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000223 return 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000224}
225
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000226/* generate 8-bit version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000227
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300228#define SRE_CHAR Py_UCS1
229#define SIZEOF_SRE_CHAR 1
230#define SRE(F) sre_ucs1_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300231#include "sre_lib.h"
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000232
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300233/* generate 16-bit unicode version */
Guido van Rossumb700df92000-03-31 14:59:30 +0000234
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300235#define SRE_CHAR Py_UCS2
236#define SIZEOF_SRE_CHAR 2
237#define SRE(F) sre_ucs2_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300238#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000239
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300240/* generate 32-bit unicode version */
241
242#define SRE_CHAR Py_UCS4
243#define SIZEOF_SRE_CHAR 4
244#define SRE(F) sre_ucs4_##F
Serhiy Storchaka8444ebb2013-10-26 11:18:42 +0300245#include "sre_lib.h"
Guido van Rossumb700df92000-03-31 14:59:30 +0000246
247/* -------------------------------------------------------------------- */
248/* factories and destructors */
249
250/* see sre.h for object declarations */
Victor Stinnerf5587782013-11-15 23:21:11 +0100251static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300252static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
Guido van Rossumb700df92000-03-31 14:59:30 +0000253
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300254
255/*[clinic input]
256module _sre
257class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
258class _sre.SRE_Match "MatchObject *" "&Match_Type"
259class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
260[clinic start generated code]*/
261/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
262
Larry Hastings2d0a69a2015-05-03 14:49:19 -0700263static PyTypeObject Pattern_Type;
264static PyTypeObject Match_Type;
265static PyTypeObject Scanner_Type;
266
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300267/*[clinic input]
268_sre.getcodesize -> int
269[clinic start generated code]*/
270
271static int
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +0300272_sre_getcodesize_impl(PyObject *module)
273/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000274{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300275 return sizeof(SRE_CODE);
Guido van Rossumb700df92000-03-31 14:59:30 +0000276}
277
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300278/*[clinic input]
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300279_sre.ascii_iscased -> bool
280
281 character: int
282 /
283
284[clinic start generated code]*/
285
286static int
287_sre_ascii_iscased_impl(PyObject *module, int character)
288/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
289{
290 unsigned int ch = (unsigned int)character;
Sergey Fedoseev7f0d59f2018-09-12 17:49:09 +0500291 return ch < 128 && Py_ISALPHA(ch);
Serhiy Storchaka6d336a02017-05-09 23:37:14 +0300292}
293
294/*[clinic input]
295_sre.unicode_iscased -> bool
296
297 character: int
298 /
299
300[clinic start generated code]*/
301
302static int
303_sre_unicode_iscased_impl(PyObject *module, int character)
304/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
305{
306 unsigned int ch = (unsigned int)character;
307 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
308}
309
310/*[clinic input]
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300311_sre.ascii_tolower -> int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300312
313 character: int
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300314 /
315
316[clinic start generated code]*/
317
318static int
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300319_sre_ascii_tolower_impl(PyObject *module, int character)
320/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000321{
Serhiy Storchaka3557b052017-10-24 23:31:42 +0300322 return sre_lower_ascii(character);
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000323}
324
Serhiy Storchaka7186cc22017-05-05 10:42:46 +0300325/*[clinic input]
326_sre.unicode_tolower -> int
327
328 character: int
329 /
330
331[clinic start generated code]*/
332
333static int
334_sre_unicode_tolower_impl(PyObject *module, int character)
335/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
336{
337 return sre_lower_unicode(character);
338}
339
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000340LOCAL(void)
341state_reset(SRE_STATE* state)
342{
animalize4a7f44a2019-02-18 21:26:37 +0800343 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000344 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000345
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000346 state->lastmark = -1;
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000347 state->lastindex = -1;
348
349 state->repeat = NULL;
350
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000351 data_stack_dealloc(state);
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000352}
353
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000354static void*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200355getstring(PyObject* string, Py_ssize_t* p_length,
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300356 int* p_isbytes, int* p_charsize,
Benjamin Peterson33d21a22012-03-07 14:59:13 -0600357 Py_buffer *view)
Guido van Rossumb700df92000-03-31 14:59:30 +0000358{
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000359 /* given a python object, return a data pointer, a length (in
360 characters), and a character size. return NULL if the object
361 is not a string (or not compatible) */
Tim Peters3d563502006-01-21 02:47:53 +0000362
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000363 /* Unicode objects do not support the buffer API. So, get the data
364 directly instead. */
365 if (PyUnicode_Check(string)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 if (PyUnicode_READY(string) == -1)
367 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 *p_length = PyUnicode_GET_LENGTH(string);
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200369 *p_charsize = PyUnicode_KIND(string);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300370 *p_isbytes = 0;
371 return PyUnicode_DATA(string);
Alexandre Vassalotti70a23712007-10-14 02:05:51 +0000372 }
373
Victor Stinner0058b862011-09-29 03:27:47 +0200374 /* get pointer to byte string buffer */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300375 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200376 PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300377 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000378 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000379
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300380 *p_length = view->len;
381 *p_charsize = 1;
382 *p_isbytes = 1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000383
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300384 if (view->buf == NULL) {
385 PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
386 PyBuffer_Release(view);
387 view->buf = NULL;
388 return NULL;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000389 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300390 return view->buf;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000391}
392
393LOCAL(PyObject*)
394state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395 Py_ssize_t start, Py_ssize_t end)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000396{
397 /* prepare state object */
398
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000399 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300400 int isbytes, charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000401 void* ptr;
402
403 memset(state, 0, sizeof(SRE_STATE));
404
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300405 state->mark = PyMem_New(void *, pattern->groups * 2);
406 if (!state->mark) {
407 PyErr_NoMemory();
408 goto err;
409 }
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000410 state->lastmark = -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000411 state->lastindex = -1;
412
Benjamin Petersone48944b2012-03-07 14:50:25 -0600413 state->buffer.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300414 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000415 if (!ptr)
Benjamin Petersone48944b2012-03-07 14:50:25 -0600416 goto err;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000417
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300418 if (isbytes && pattern->isbytes == 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600419 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200420 "cannot use a string pattern on a bytes-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600421 goto err;
422 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300423 if (!isbytes && pattern->isbytes > 0) {
Benjamin Petersone48944b2012-03-07 14:50:25 -0600424 PyErr_SetString(PyExc_TypeError,
Serhiy Storchaka632a77e2015-03-25 21:03:47 +0200425 "cannot use a bytes pattern on a string-like object");
Benjamin Petersone48944b2012-03-07 14:50:25 -0600426 goto err;
427 }
Antoine Pitroufd036452008-08-19 17:56:33 +0000428
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000429 /* adjust boundaries */
430 if (start < 0)
431 start = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000432 else if (start > length)
433 start = length;
Guido van Rossumb700df92000-03-31 14:59:30 +0000434
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000435 if (end < 0)
436 end = 0;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000437 else if (end > length)
438 end = length;
439
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300440 state->isbytes = isbytes;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000441 state->charsize = charsize;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200442 state->match_all = 0;
443 state->must_advance = 0;
Guido van Rossumb700df92000-03-31 14:59:30 +0000444
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000445 state->beginning = ptr;
Guido van Rossumb700df92000-03-31 14:59:30 +0000446
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000447 state->start = (void*) ((char*) ptr + start * state->charsize);
448 state->end = (void*) ((char*) ptr + end * state->charsize);
449
450 Py_INCREF(string);
451 state->string = string;
452 state->pos = start;
453 state->endpos = end;
Guido van Rossumb700df92000-03-31 14:59:30 +0000454
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000455 return string;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600456 err:
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300457 PyMem_Del(state->mark);
458 state->mark = NULL;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600459 if (state->buffer.buf)
460 PyBuffer_Release(&state->buffer);
461 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000462}
463
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000464LOCAL(void)
465state_fini(SRE_STATE* state)
466{
Benjamin Petersone48944b2012-03-07 14:50:25 -0600467 if (state->buffer.buf)
468 PyBuffer_Release(&state->buffer);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000469 Py_XDECREF(state->string);
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000470 data_stack_dealloc(state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300471 PyMem_Del(state->mark);
472 state->mark = NULL;
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000473}
474
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000475/* calculate offset from start of string */
476#define STATE_OFFSET(state, member)\
477 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
478
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000479LOCAL(PyObject*)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300480getslice(int isbytes, const void *ptr,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300481 PyObject* string, Py_ssize_t start, Py_ssize_t end)
482{
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300483 if (isbytes) {
Serhiy Storchaka25324972013-10-16 12:46:28 +0300484 if (PyBytes_CheckExact(string) &&
485 start == 0 && end == PyBytes_GET_SIZE(string)) {
486 Py_INCREF(string);
487 return string;
488 }
489 return PyBytes_FromStringAndSize(
490 (const char *)ptr + start, end - start);
491 }
492 else {
493 return PyUnicode_Substring(string, start, end);
494 }
495}
496
497LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000498state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000499{
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000500 Py_ssize_t i, j;
Fredrik Lundh58100642000-08-09 09:14:35 +0000501
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000502 index = (index - 1) * 2;
503
Gustavo Niemeyerad3fc442003-10-17 22:13:16 +0000504 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000505 if (empty)
506 /* want empty string */
507 i = j = 0;
508 else {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200509 Py_RETURN_NONE;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000510 }
Fredrik Lundh58100642000-08-09 09:14:35 +0000511 } else {
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000512 i = STATE_OFFSET(state, state->mark[index]);
513 j = STATE_OFFSET(state, state->mark[index+1]);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000514 }
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000515
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300516 return getslice(state->isbytes, state->beginning, string, i, j);
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000517}
518
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000519static void
Victor Stinnerf5587782013-11-15 23:21:11 +0100520pattern_error(Py_ssize_t status)
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000521{
522 switch (status) {
523 case SRE_ERROR_RECURSION_LIMIT:
Yury Selivanovf488fb42015-07-03 01:04:23 -0400524 /* This error code seems to be unused. */
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000525 PyErr_SetString(
Yury Selivanovf488fb42015-07-03 01:04:23 -0400526 PyExc_RecursionError,
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000527 "maximum recursion limit exceeded"
528 );
529 break;
530 case SRE_ERROR_MEMORY:
531 PyErr_NoMemory();
532 break;
Christian Heimes2380ac72008-01-09 00:17:24 +0000533 case SRE_ERROR_INTERRUPTED:
534 /* An exception has already been raised, so let it fly */
535 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000536 default:
537 /* other error codes indicate compiler/engine bugs */
538 PyErr_SetString(
539 PyExc_RuntimeError,
540 "internal error in regular expression engine"
541 );
542 }
543}
544
Guido van Rossumb700df92000-03-31 14:59:30 +0000545static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000546pattern_dealloc(PatternObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +0000547{
Raymond Hettinger027bb632004-05-31 03:09:25 +0000548 if (self->weakreflist != NULL)
549 PyObject_ClearWeakRefs((PyObject *) self);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000550 Py_XDECREF(self->pattern);
551 Py_XDECREF(self->groupindex);
Fredrik Lundh6f5cba62001-01-16 07:05:29 +0000552 Py_XDECREF(self->indexgroup);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000553 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +0000554}
555
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300556LOCAL(Py_ssize_t)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200557sre_match(SRE_STATE* state, SRE_CODE* pattern)
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300558{
559 if (state->charsize == 1)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200560 return sre_ucs1_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300561 if (state->charsize == 2)
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200562 return sre_ucs2_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300563 assert(state->charsize == 4);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200564 return sre_ucs4_match(state, pattern, 1);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300565}
566
567LOCAL(Py_ssize_t)
568sre_search(SRE_STATE* state, SRE_CODE* pattern)
569{
570 if (state->charsize == 1)
571 return sre_ucs1_search(state, pattern);
572 if (state->charsize == 2)
573 return sre_ucs2_search(state, pattern);
574 assert(state->charsize == 4);
575 return sre_ucs4_search(state, pattern);
576}
577
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300578/*[clinic input]
579_sre.SRE_Pattern.match
580
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200581 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300582 pos: Py_ssize_t = 0
583 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300584
585Matches zero or more characters at the beginning of the string.
586[clinic start generated code]*/
587
Larry Hastings16c51912014-01-07 11:53:01 -0800588static PyObject *
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300589_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200590 Py_ssize_t pos, Py_ssize_t endpos)
591/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
Larry Hastings16c51912014-01-07 11:53:01 -0800592{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000593 SRE_STATE state;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100594 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300595 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000596
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300597 if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000598 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000599
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000600 state.ptr = state.start;
601
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000602 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
603
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200604 status = sre_match(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000605
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000606 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300607 if (PyErr_Occurred()) {
608 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000609 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300610 }
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000611
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300612 match = pattern_new_match(self, &state, status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000613 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300614 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000615}
616
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300617/*[clinic input]
618_sre.SRE_Pattern.fullmatch
619
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200620 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300621 pos: Py_ssize_t = 0
622 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300623
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300624Matches against all of the string.
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300625[clinic start generated code]*/
626
627static PyObject *
628_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200629 Py_ssize_t pos, Py_ssize_t endpos)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +0300630/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200631{
632 SRE_STATE state;
633 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300634 PyObject *match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200635
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300636 if (!state_init(&state, self, string, pos, endpos))
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200637 return NULL;
638
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200639 state.ptr = state.start;
640
641 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
642
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200643 state.match_all = 1;
644 status = sre_match(&state, PatternObject_GetCode(self));
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200645
646 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300647 if (PyErr_Occurred()) {
648 state_fini(&state);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200649 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300650 }
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200651
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300652 match = pattern_new_match(self, &state, status);
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200653 state_fini(&state);
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300654 return match;
Serhiy Storchaka32eddc12013-11-23 23:20:30 +0200655}
656
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300657/*[clinic input]
658_sre.SRE_Pattern.search
659
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200660 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300661 pos: Py_ssize_t = 0
662 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300663
664Scan through string looking for a match, and return a corresponding match object instance.
665
666Return None if no position in the string matches.
667[clinic start generated code]*/
668
669static PyObject *
670_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200671 Py_ssize_t pos, Py_ssize_t endpos)
672/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000673{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000674 SRE_STATE state;
Victor Stinnerf5587782013-11-15 23:21:11 +0100675 Py_ssize_t status;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300676 PyObject *match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000677
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300678 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000679 return NULL;
680
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000681 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
682
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300683 status = sre_search(&state, PatternObject_GetCode(self));
Guido van Rossumb700df92000-03-31 14:59:30 +0000684
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000685 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
686
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300687 if (PyErr_Occurred()) {
688 state_fini(&state);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000689 return NULL;
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300690 }
Thomas Wouters89f507f2006-12-13 04:49:30 +0000691
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +0300692 match = pattern_new_match(self, &state, status);
693 state_fini(&state);
694 return match;
Guido van Rossumb700df92000-03-31 14:59:30 +0000695}
696
697static PyObject*
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200698call(const char* module, const char* function, PyObject* args)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000699{
700 PyObject* name;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000701 PyObject* mod;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000702 PyObject* func;
703 PyObject* result;
704
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000705 if (!args)
706 return NULL;
Neal Norwitzfe537132007-08-26 03:55:15 +0000707 name = PyUnicode_FromString(module);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000708 if (!name)
709 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000710 mod = PyImport_Import(name);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000711 Py_DECREF(name);
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000712 if (!mod)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000713 return NULL;
Fredrik Lundhd89a2e72001-07-03 20:32:36 +0000714 func = PyObject_GetAttrString(mod, function);
715 Py_DECREF(mod);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000716 if (!func)
717 return NULL;
718 result = PyObject_CallObject(func, args);
719 Py_DECREF(func);
720 Py_DECREF(args);
721 return result;
722}
723
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300724/*[clinic input]
725_sre.SRE_Pattern.findall
726
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200727 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300728 pos: Py_ssize_t = 0
729 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300730
731Return a list of all non-overlapping matches of pattern in string.
732[clinic start generated code]*/
733
734static PyObject *
735_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200736 Py_ssize_t pos, Py_ssize_t endpos)
737/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
Guido van Rossumb700df92000-03-31 14:59:30 +0000738{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000739 SRE_STATE state;
740 PyObject* list;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100741 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000742 Py_ssize_t i, b, e;
Guido van Rossumb700df92000-03-31 14:59:30 +0000743
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300744 if (!state_init(&state, self, string, pos, endpos))
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000745 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +0000746
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000747 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000748 if (!list) {
749 state_fini(&state);
750 return NULL;
751 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000752
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000753 while (state.start <= state.end) {
Guido van Rossumb700df92000-03-31 14:59:30 +0000754
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000755 PyObject* item;
Tim Peters3d563502006-01-21 02:47:53 +0000756
Fredrik Lundhebc37b22000-10-28 19:30:41 +0000757 state_reset(&state);
758
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000759 state.ptr = state.start;
760
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300761 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300762 if (PyErr_Occurred())
763 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000764
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000765 if (status <= 0) {
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000766 if (status == 0)
767 break;
Fredrik Lundh96ab4652000-08-03 16:29:50 +0000768 pattern_error(status);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000769 goto error;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000770 }
Tim Peters3d563502006-01-21 02:47:53 +0000771
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000772 /* don't bother to build a match object */
773 switch (self->groups) {
774 case 0:
775 b = STATE_OFFSET(&state, state.start);
776 e = STATE_OFFSET(&state, state.ptr);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300777 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +0300778 string, b, e);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000779 if (!item)
780 goto error;
781 break;
782 case 1:
783 item = state_getslice(&state, 1, string, 1);
784 if (!item)
785 goto error;
786 break;
787 default:
788 item = PyTuple_New(self->groups);
789 if (!item)
790 goto error;
791 for (i = 0; i < self->groups; i++) {
792 PyObject* o = state_getslice(&state, i+1, string, 1);
793 if (!o) {
794 Py_DECREF(item);
795 goto error;
796 }
797 PyTuple_SET_ITEM(item, i, o);
798 }
799 break;
800 }
801
802 status = PyList_Append(list, item);
803 Py_DECREF(item);
804 if (status < 0)
805 goto error;
806
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +0200807 state.must_advance = (state.ptr == state.start);
808 state.start = state.ptr;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000809 }
Guido van Rossumb700df92000-03-31 14:59:30 +0000810
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000811 state_fini(&state);
812 return list;
Guido van Rossumb700df92000-03-31 14:59:30 +0000813
814error:
Fredrik Lundh75f2d672000-06-29 11:34:28 +0000815 Py_DECREF(list);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000816 state_fini(&state);
817 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000818
Guido van Rossumb700df92000-03-31 14:59:30 +0000819}
820
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300821/*[clinic input]
822_sre.SRE_Pattern.finditer
823
824 string: object
825 pos: Py_ssize_t = 0
826 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
827
828Return an iterator over all non-overlapping matches for the RE pattern in string.
829
830For each match, the iterator returns a match object.
831[clinic start generated code]*/
832
833static PyObject *
834_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
835 Py_ssize_t pos, Py_ssize_t endpos)
836/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
Fredrik Lundh703ce812001-10-24 22:16:30 +0000837{
838 PyObject* scanner;
839 PyObject* search;
840 PyObject* iterator;
841
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300842 scanner = pattern_scanner(self, string, pos, endpos);
Fredrik Lundh703ce812001-10-24 22:16:30 +0000843 if (!scanner)
844 return NULL;
845
846 search = PyObject_GetAttrString(scanner, "search");
847 Py_DECREF(scanner);
848 if (!search)
849 return NULL;
850
851 iterator = PyCallIter_New(search, Py_None);
852 Py_DECREF(search);
853
854 return iterator;
855}
Fredrik Lundh703ce812001-10-24 22:16:30 +0000856
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300857/*[clinic input]
858_sre.SRE_Pattern.scanner
859
860 string: object
861 pos: Py_ssize_t = 0
862 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
863
864[clinic start generated code]*/
865
866static PyObject *
867_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
868 Py_ssize_t pos, Py_ssize_t endpos)
869/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
870{
871 return pattern_scanner(self, string, pos, endpos);
872}
873
874/*[clinic input]
875_sre.SRE_Pattern.split
876
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200877 string: object
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300878 maxsplit: Py_ssize_t = 0
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300879
880Split string by the occurrences of pattern.
881[clinic start generated code]*/
882
883static PyObject *
884_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
Serhiy Storchakab37f3f62017-01-13 08:53:58 +0200885 Py_ssize_t maxsplit)
886/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000887{
888 SRE_STATE state;
889 PyObject* list;
890 PyObject* item;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100891 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000892 Py_ssize_t n;
893 Py_ssize_t i;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000894 void* last;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000895
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200896 assert(self->codesize != 0);
Serhiy Storchaka83e80272015-02-03 11:04:19 +0200897
Serhiy Storchakaa860aea2015-05-03 15:54:23 +0300898 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000899 return NULL;
900
901 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +0000902 if (!list) {
903 state_fini(&state);
904 return NULL;
905 }
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000906
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000907 n = 0;
908 last = state.start;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000909
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000910 while (!maxsplit || n < maxsplit) {
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000911
912 state_reset(&state);
913
914 state.ptr = state.start;
915
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300916 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +0300917 if (PyErr_Occurred())
918 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000919
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000920 if (status <= 0) {
921 if (status == 0)
922 break;
923 pattern_error(status);
924 goto error;
925 }
Tim Peters3d563502006-01-21 02:47:53 +0000926
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000927 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300928 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000929 string, STATE_OFFSET(&state, last),
930 STATE_OFFSET(&state, state.start)
931 );
932 if (!item)
933 goto error;
934 status = PyList_Append(list, item);
935 Py_DECREF(item);
936 if (status < 0)
937 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000938
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000939 /* add groups (if any) */
940 for (i = 0; i < self->groups; i++) {
941 item = state_getslice(&state, i+1, string, 0);
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000942 if (!item)
943 goto error;
944 status = PyList_Append(list, item);
945 Py_DECREF(item);
946 if (status < 0)
947 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000948 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000949
950 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +0200951 state.must_advance = (state.ptr == state.start);
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000952 last = state.start = state.ptr;
953
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000954 }
955
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000956 /* get segment following last match (even if empty) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300957 item = getslice(state.isbytes, state.beginning,
Fredrik Lundhf864aa82001-10-22 06:01:56 +0000958 string, STATE_OFFSET(&state, last), state.endpos
959 );
960 if (!item)
961 goto error;
962 status = PyList_Append(list, item);
963 Py_DECREF(item);
964 if (status < 0)
965 goto error;
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000966
967 state_fini(&state);
968 return list;
969
970error:
971 Py_DECREF(list);
972 state_fini(&state);
973 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +0000974
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000975}
Fredrik Lundh971e78b2001-10-20 17:48:46 +0000976
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000977static PyObject*
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000978pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000979 Py_ssize_t count, Py_ssize_t subn)
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000980{
981 SRE_STATE state;
982 PyObject* list;
Serhiy Storchaka25324972013-10-16 12:46:28 +0300983 PyObject* joiner;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000984 PyObject* item;
985 PyObject* filter;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000986 PyObject* match;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +0000987 void* ptr;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +0100988 Py_ssize_t status;
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000989 Py_ssize_t n;
990 Py_ssize_t i, b, e;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +0300991 int isbytes, charsize;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000992 int filter_is_callable;
Benjamin Petersone48944b2012-03-07 14:50:25 -0600993 Py_buffer view;
Fredrik Lundhbec95b92001-10-21 16:47:57 +0000994
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000995 if (PyCallable_Check(ptemplate)) {
Fredrik Lundhdac58492001-10-21 21:48:30 +0000996 /* sub/subn takes either a function or a template */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000997 filter = ptemplate;
Fredrik Lundhdac58492001-10-21 21:48:30 +0000998 Py_INCREF(filter);
999 filter_is_callable = 1;
1000 } else {
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001001 /* if not callable, check if it's a literal string */
1002 int literal;
Benjamin Petersone48944b2012-03-07 14:50:25 -06001003 view.buf = NULL;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001004 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 b = charsize;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001006 if (ptr) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001007 if (charsize == 1)
1008 literal = memchr(ptr, '\\', n) == NULL;
1009 else
1010 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001011 } else {
1012 PyErr_Clear();
1013 literal = 0;
1014 }
Benjamin Petersone48944b2012-03-07 14:50:25 -06001015 if (view.buf)
1016 PyBuffer_Release(&view);
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001017 if (literal) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001018 filter = ptemplate;
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001019 Py_INCREF(filter);
1020 filter_is_callable = 0;
1021 } else {
1022 /* not a literal; hand it over to the template compiler */
1023 filter = call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001024 SRE_PY_MODULE, "_subx",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001025 PyTuple_Pack(2, self, ptemplate)
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001026 );
1027 if (!filter)
1028 return NULL;
1029 filter_is_callable = PyCallable_Check(filter);
1030 }
Fredrik Lundhdac58492001-10-21 21:48:30 +00001031 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001032
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001033 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001034 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001035 return NULL;
Fredrik Lundh82b23072001-12-09 16:13:15 +00001036 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001037
1038 list = PyList_New(0);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001039 if (!list) {
Fredrik Lundh82b23072001-12-09 16:13:15 +00001040 Py_DECREF(filter);
Fredrik Lundh1296a8d2001-10-21 18:04:11 +00001041 state_fini(&state);
1042 return NULL;
1043 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001044
1045 n = i = 0;
1046
1047 while (!count || n < count) {
1048
1049 state_reset(&state);
1050
1051 state.ptr = state.start;
1052
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001053 status = sre_search(&state, PatternObject_GetCode(self));
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001054 if (PyErr_Occurred())
1055 goto error;
Thomas Wouters89f507f2006-12-13 04:49:30 +00001056
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001057 if (status <= 0) {
1058 if (status == 0)
1059 break;
1060 pattern_error(status);
1061 goto error;
1062 }
Tim Peters3d563502006-01-21 02:47:53 +00001063
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001064 b = STATE_OFFSET(&state, state.start);
1065 e = STATE_OFFSET(&state, state.ptr);
1066
1067 if (i < b) {
1068 /* get segment before this match */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001069 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001070 string, i, b);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001071 if (!item)
1072 goto error;
1073 status = PyList_Append(list, item);
1074 Py_DECREF(item);
1075 if (status < 0)
1076 goto error;
1077
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001078 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001079
1080 if (filter_is_callable) {
Fredrik Lundhdac58492001-10-21 21:48:30 +00001081 /* pass match object through filter */
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001082 match = pattern_new_match(self, &state, 1);
1083 if (!match)
1084 goto error;
Victor Stinner7bfb42d2016-12-05 17:04:32 +01001085 item = PyObject_CallFunctionObjArgs(filter, match, NULL);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001086 Py_DECREF(match);
1087 if (!item)
1088 goto error;
1089 } else {
1090 /* filter is literal string */
1091 item = filter;
Fredrik Lundhdac58492001-10-21 21:48:30 +00001092 Py_INCREF(item);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001093 }
1094
1095 /* add to list */
Fredrik Lundh6de22ef2001-10-22 21:18:08 +00001096 if (item != Py_None) {
1097 status = PyList_Append(list, item);
1098 Py_DECREF(item);
1099 if (status < 0)
1100 goto error;
1101 }
Tim Peters3d563502006-01-21 02:47:53 +00001102
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001103 i = e;
1104 n = n + 1;
Serhiy Storchakafbb490f2018-01-04 11:06:13 +02001105 state.must_advance = (state.ptr == state.start);
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02001106 state.start = state.ptr;
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001107 }
1108
1109 /* get segment following last match */
Fredrik Lundhdac58492001-10-21 21:48:30 +00001110 if (i < state.endpos) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001111 item = getslice(state.isbytes, state.beginning,
Serhiy Storchaka25324972013-10-16 12:46:28 +03001112 string, i, state.endpos);
Fredrik Lundhdac58492001-10-21 21:48:30 +00001113 if (!item)
1114 goto error;
1115 status = PyList_Append(list, item);
1116 Py_DECREF(item);
1117 if (status < 0)
1118 goto error;
1119 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001120
1121 state_fini(&state);
1122
Guido van Rossum4e173842001-12-07 04:25:10 +00001123 Py_DECREF(filter);
1124
Fredrik Lundhdac58492001-10-21 21:48:30 +00001125 /* convert list to single string (also removes list) */
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001126 joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001127 if (!joiner) {
1128 Py_DECREF(list);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001129 return NULL;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001130 }
1131 if (PyList_GET_SIZE(list) == 0) {
1132 Py_DECREF(list);
1133 item = joiner;
1134 }
1135 else {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001136 if (state.isbytes)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001137 item = _PyBytes_Join(joiner, list);
1138 else
1139 item = PyUnicode_Join(joiner, list);
1140 Py_DECREF(joiner);
Brett Cannonbaced562013-10-18 14:03:16 -04001141 Py_DECREF(list);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001142 if (!item)
1143 return NULL;
1144 }
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001145
1146 if (subn)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01001147 return Py_BuildValue("Nn", item, n);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001148
1149 return item;
1150
1151error:
1152 Py_DECREF(list);
1153 state_fini(&state);
Fredrik Lundh82b23072001-12-09 16:13:15 +00001154 Py_DECREF(filter);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001155 return NULL;
Tim Peters3d563502006-01-21 02:47:53 +00001156
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001157}
1158
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001159/*[clinic input]
1160_sre.SRE_Pattern.sub
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001161
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001162 repl: object
1163 string: object
1164 count: Py_ssize_t = 0
1165
1166Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1167[clinic start generated code]*/
1168
1169static PyObject *
1170_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1171 PyObject *string, Py_ssize_t count)
1172/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1173{
1174 return pattern_subx(self, repl, string, count, 0);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001175}
1176
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001177/*[clinic input]
1178_sre.SRE_Pattern.subn
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001179
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001180 repl: object
1181 string: object
1182 count: Py_ssize_t = 0
1183
1184Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1185[clinic start generated code]*/
1186
1187static PyObject *
1188_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1189 PyObject *string, Py_ssize_t count)
1190/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1191{
1192 return pattern_subx(self, repl, string, count, 1);
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001193}
Fredrik Lundhbec95b92001-10-21 16:47:57 +00001194
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001195/*[clinic input]
1196_sre.SRE_Pattern.__copy__
1197
1198[clinic start generated code]*/
1199
1200static PyObject *
1201_sre_SRE_Pattern___copy___impl(PatternObject *self)
1202/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001203{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001204 Py_INCREF(self);
1205 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001206}
1207
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001208/*[clinic input]
1209_sre.SRE_Pattern.__deepcopy__
1210
1211 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001212 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001213
1214[clinic start generated code]*/
1215
1216static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001217_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
1218/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001219{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03001220 Py_INCREF(self);
1221 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00001222}
1223
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001224static PyObject *
1225pattern_repr(PatternObject *obj)
1226{
1227 static const struct {
1228 const char *name;
1229 int value;
1230 } flag_names[] = {
1231 {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1232 {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1233 {"re.LOCALE", SRE_FLAG_LOCALE},
1234 {"re.MULTILINE", SRE_FLAG_MULTILINE},
1235 {"re.DOTALL", SRE_FLAG_DOTALL},
1236 {"re.UNICODE", SRE_FLAG_UNICODE},
1237 {"re.VERBOSE", SRE_FLAG_VERBOSE},
1238 {"re.DEBUG", SRE_FLAG_DEBUG},
1239 {"re.ASCII", SRE_FLAG_ASCII},
1240 };
1241 PyObject *result = NULL;
1242 PyObject *flag_items;
Victor Stinner706768c2014-08-16 01:03:39 +02001243 size_t i;
Serhiy Storchaka5c24d0e2013-11-23 22:42:43 +02001244 int flags = obj->flags;
1245
1246 /* Omit re.UNICODE for valid string patterns. */
1247 if (obj->isbytes == 0 &&
1248 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1249 SRE_FLAG_UNICODE)
1250 flags &= ~SRE_FLAG_UNICODE;
1251
1252 flag_items = PyList_New(0);
1253 if (!flag_items)
1254 return NULL;
1255
1256 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1257 if (flags & flag_names[i].value) {
1258 PyObject *item = PyUnicode_FromString(flag_names[i].name);
1259 if (!item)
1260 goto done;
1261
1262 if (PyList_Append(flag_items, item) < 0) {
1263 Py_DECREF(item);
1264 goto done;
1265 }
1266 Py_DECREF(item);
1267 flags &= ~flag_names[i].value;
1268 }
1269 }
1270 if (flags) {
1271 PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1272 if (!item)
1273 goto done;
1274
1275 if (PyList_Append(flag_items, item) < 0) {
1276 Py_DECREF(item);
1277 goto done;
1278 }
1279 Py_DECREF(item);
1280 }
1281
1282 if (PyList_Size(flag_items) > 0) {
1283 PyObject *flags_result;
1284 PyObject *sep = PyUnicode_FromString("|");
1285 if (!sep)
1286 goto done;
1287 flags_result = PyUnicode_Join(sep, flag_items);
1288 Py_DECREF(sep);
1289 if (!flags_result)
1290 goto done;
1291 result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1292 obj->pattern, flags_result);
1293 Py_DECREF(flags_result);
1294 }
1295 else {
1296 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1297 }
1298
1299done:
1300 Py_DECREF(flag_items);
1301 return result;
1302}
1303
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03001304PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
Raymond Hettinger94478742004-09-24 04:31:19 +00001305
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001306/* PatternObject's 'groupindex' method. */
1307static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02001308pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored))
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001309{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001310 if (self->groupindex == NULL)
1311 return PyDict_New();
Serhiy Storchaka07360df2015-03-30 01:01:48 +03001312 return PyDictProxy_New(self->groupindex);
1313}
1314
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001315static int _validate(PatternObject *self); /* Forward */
1316
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001317/*[clinic input]
1318_sre.compile
1319
1320 pattern: object
1321 flags: int
1322 code: object(subclass_of='&PyList_Type')
1323 groups: Py_ssize_t
Victor Stinner726a57d2016-11-22 23:04:39 +01001324 groupindex: object(subclass_of='&PyDict_Type')
1325 indexgroup: object(subclass_of='&PyTuple_Type')
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001326
1327[clinic start generated code]*/
1328
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001329static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001330_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001331 PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1332 PyObject *indexgroup)
Victor Stinner726a57d2016-11-22 23:04:39 +01001333/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001334{
1335 /* "compile" pattern descriptor to pattern object */
1336
1337 PatternObject* self;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001338 Py_ssize_t i, n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001339
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001340 n = PyList_GET_SIZE(code);
Christian Heimes587c2bf2008-01-19 16:21:02 +00001341 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001342 self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1343 if (!self)
1344 return NULL;
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001345 self->weakreflist = NULL;
1346 self->pattern = NULL;
1347 self->groupindex = NULL;
1348 self->indexgroup = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001349
1350 self->codesize = n;
1351
1352 for (i = 0; i < n; i++) {
1353 PyObject *o = PyList_GET_ITEM(code, i);
Guido van Rossumddefaf32007-01-14 03:31:43 +00001354 unsigned long value = PyLong_AsUnsignedLong(o);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001355 self->code[i] = (SRE_CODE) value;
1356 if ((unsigned long) self->code[i] != value) {
1357 PyErr_SetString(PyExc_OverflowError,
1358 "regular expression code size limit exceeded");
1359 break;
1360 }
1361 }
1362
1363 if (PyErr_Occurred()) {
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00001364 Py_DECREF(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001365 return NULL;
1366 }
1367
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001368 if (pattern == Py_None) {
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001369 self->isbytes = -1;
Victor Stinner63ab8752011-11-22 03:31:20 +01001370 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001371 else {
1372 Py_ssize_t p_length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001373 int charsize;
1374 Py_buffer view;
1375 view.buf = NULL;
1376 if (!getstring(pattern, &p_length, &self->isbytes,
1377 &charsize, &view)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001378 Py_DECREF(self);
1379 return NULL;
1380 }
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001381 if (view.buf)
1382 PyBuffer_Release(&view);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383 }
Antoine Pitroufd036452008-08-19 17:56:33 +00001384
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001385 Py_INCREF(pattern);
1386 self->pattern = pattern;
1387
1388 self->flags = flags;
1389
1390 self->groups = groups;
1391
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001392 if (PyDict_GET_SIZE(groupindex) > 0) {
1393 Py_INCREF(groupindex);
1394 self->groupindex = groupindex;
1395 if (PyTuple_GET_SIZE(indexgroup) > 0) {
1396 Py_INCREF(indexgroup);
1397 self->indexgroup = indexgroup;
1398 }
1399 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001400
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001401 if (!_validate(self)) {
1402 Py_DECREF(self);
1403 return NULL;
1404 }
1405
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001406 return (PyObject*) self;
1407}
1408
Guido van Rossumb700df92000-03-31 14:59:30 +00001409/* -------------------------------------------------------------------- */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001410/* Code validation */
1411
1412/* To learn more about this code, have a look at the _compile() function in
1413 Lib/sre_compile.py. The validation functions below checks the code array
1414 for conformance with the code patterns generated there.
1415
1416 The nice thing about the generated code is that it is position-independent:
1417 all jumps are relative jumps forward. Also, jumps don't cross each other:
1418 the target of a later jump is always earlier than the target of an earlier
1419 jump. IOW, this is okay:
1420
1421 J---------J-------T--------T
1422 \ \_____/ /
1423 \______________________/
1424
1425 but this is not:
1426
1427 J---------J-------T--------T
1428 \_________\_____/ /
1429 \____________/
1430
Serhiy Storchakaefa5a392013-10-27 08:04:58 +02001431 It also helps that SRE_CODE is always an unsigned type.
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001432*/
1433
1434/* Defining this one enables tracing of the validator */
1435#undef VVERBOSE
1436
1437/* Trace macro for the validator */
1438#if defined(VVERBOSE)
1439#define VTRACE(v) printf v
1440#else
Senthil Kumaran202a3c42011-10-20 02:15:36 +08001441#define VTRACE(v) do {} while(0) /* do nothing */
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001442#endif
1443
1444/* Report failure */
1445#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1446
1447/* Extract opcode, argument, or skip count from code array */
1448#define GET_OP \
1449 do { \
1450 VTRACE(("%p: ", code)); \
1451 if (code >= end) FAIL; \
1452 op = *code++; \
1453 VTRACE(("%lu (op)\n", (unsigned long)op)); \
1454 } while (0)
1455#define GET_ARG \
1456 do { \
1457 VTRACE(("%p= ", code)); \
1458 if (code >= end) FAIL; \
1459 arg = *code++; \
1460 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1461 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001462#define GET_SKIP_ADJ(adj) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001463 do { \
1464 VTRACE(("%p= ", code)); \
1465 if (code >= end) FAIL; \
1466 skip = *code; \
1467 VTRACE(("%lu (skip to %p)\n", \
1468 (unsigned long)skip, code+skip)); \
Benjamin Petersonca470632016-09-06 13:47:26 -07001469 if (skip-adj > (uintptr_t)(end - code)) \
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001470 FAIL; \
1471 code++; \
1472 } while (0)
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001473#define GET_SKIP GET_SKIP_ADJ(0)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001474
1475static int
1476_validate_charset(SRE_CODE *code, SRE_CODE *end)
1477{
1478 /* Some variables are manipulated by the macros above */
1479 SRE_CODE op;
1480 SRE_CODE arg;
1481 SRE_CODE offset;
1482 int i;
1483
1484 while (code < end) {
1485 GET_OP;
1486 switch (op) {
1487
1488 case SRE_OP_NEGATE:
1489 break;
1490
1491 case SRE_OP_LITERAL:
1492 GET_ARG;
1493 break;
1494
1495 case SRE_OP_RANGE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001496 case SRE_OP_RANGE_UNI_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001497 GET_ARG;
1498 GET_ARG;
1499 break;
1500
1501 case SRE_OP_CHARSET:
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001502 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
Benjamin Petersonca470632016-09-06 13:47:26 -07001503 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001504 FAIL;
1505 code += offset;
1506 break;
1507
1508 case SRE_OP_BIGCHARSET:
1509 GET_ARG; /* Number of blocks */
1510 offset = 256/sizeof(SRE_CODE); /* 256-byte table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001511 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001512 FAIL;
1513 /* Make sure that each byte points to a valid block */
1514 for (i = 0; i < 256; i++) {
1515 if (((unsigned char *)code)[i] >= arg)
1516 FAIL;
1517 }
1518 code += offset;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001519 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
Benjamin Petersonca470632016-09-06 13:47:26 -07001520 if (offset > (uintptr_t)(end - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001521 FAIL;
1522 code += offset;
1523 break;
1524
1525 case SRE_OP_CATEGORY:
1526 GET_ARG;
1527 switch (arg) {
1528 case SRE_CATEGORY_DIGIT:
1529 case SRE_CATEGORY_NOT_DIGIT:
1530 case SRE_CATEGORY_SPACE:
1531 case SRE_CATEGORY_NOT_SPACE:
1532 case SRE_CATEGORY_WORD:
1533 case SRE_CATEGORY_NOT_WORD:
1534 case SRE_CATEGORY_LINEBREAK:
1535 case SRE_CATEGORY_NOT_LINEBREAK:
1536 case SRE_CATEGORY_LOC_WORD:
1537 case SRE_CATEGORY_LOC_NOT_WORD:
1538 case SRE_CATEGORY_UNI_DIGIT:
1539 case SRE_CATEGORY_UNI_NOT_DIGIT:
1540 case SRE_CATEGORY_UNI_SPACE:
1541 case SRE_CATEGORY_UNI_NOT_SPACE:
1542 case SRE_CATEGORY_UNI_WORD:
1543 case SRE_CATEGORY_UNI_NOT_WORD:
1544 case SRE_CATEGORY_UNI_LINEBREAK:
1545 case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1546 break;
1547 default:
1548 FAIL;
1549 }
1550 break;
1551
1552 default:
1553 FAIL;
1554
1555 }
1556 }
1557
1558 return 1;
1559}
1560
1561static int
1562_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1563{
1564 /* Some variables are manipulated by the macros above */
1565 SRE_CODE op;
1566 SRE_CODE arg;
1567 SRE_CODE skip;
1568
1569 VTRACE(("code=%p, end=%p\n", code, end));
1570
1571 if (code > end)
1572 FAIL;
1573
1574 while (code < end) {
1575 GET_OP;
1576 switch (op) {
1577
1578 case SRE_OP_MARK:
1579 /* We don't check whether marks are properly nested; the
1580 sre_match() code is robust even if they don't, and the worst
1581 you can get is nonsensical match results. */
1582 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001583 if (arg > 2 * (size_t)groups + 1) {
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001584 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1585 FAIL;
1586 }
1587 break;
1588
1589 case SRE_OP_LITERAL:
1590 case SRE_OP_NOT_LITERAL:
1591 case SRE_OP_LITERAL_IGNORE:
1592 case SRE_OP_NOT_LITERAL_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001593 case SRE_OP_LITERAL_UNI_IGNORE:
1594 case SRE_OP_NOT_LITERAL_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001595 case SRE_OP_LITERAL_LOC_IGNORE:
1596 case SRE_OP_NOT_LITERAL_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001597 GET_ARG;
1598 /* The arg is just a character, nothing to check */
1599 break;
1600
1601 case SRE_OP_SUCCESS:
1602 case SRE_OP_FAILURE:
1603 /* Nothing to check; these normally end the matching process */
1604 break;
1605
1606 case SRE_OP_AT:
1607 GET_ARG;
1608 switch (arg) {
1609 case SRE_AT_BEGINNING:
1610 case SRE_AT_BEGINNING_STRING:
1611 case SRE_AT_BEGINNING_LINE:
1612 case SRE_AT_END:
1613 case SRE_AT_END_LINE:
1614 case SRE_AT_END_STRING:
1615 case SRE_AT_BOUNDARY:
1616 case SRE_AT_NON_BOUNDARY:
1617 case SRE_AT_LOC_BOUNDARY:
1618 case SRE_AT_LOC_NON_BOUNDARY:
1619 case SRE_AT_UNI_BOUNDARY:
1620 case SRE_AT_UNI_NON_BOUNDARY:
1621 break;
1622 default:
1623 FAIL;
1624 }
1625 break;
1626
1627 case SRE_OP_ANY:
1628 case SRE_OP_ANY_ALL:
1629 /* These have no operands */
1630 break;
1631
1632 case SRE_OP_IN:
1633 case SRE_OP_IN_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001634 case SRE_OP_IN_UNI_IGNORE:
Serhiy Storchaka898ff032017-05-05 08:53:40 +03001635 case SRE_OP_IN_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001636 GET_SKIP;
1637 /* Stop 1 before the end; we check the FAILURE below */
1638 if (!_validate_charset(code, code+skip-2))
1639 FAIL;
1640 if (code[skip-2] != SRE_OP_FAILURE)
1641 FAIL;
1642 code += skip-1;
1643 break;
1644
1645 case SRE_OP_INFO:
1646 {
1647 /* A minimal info field is
1648 <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1649 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1650 more follows. */
Ross Lagerwall88748d72012-03-06 21:48:57 +02001651 SRE_CODE flags, i;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001652 SRE_CODE *newcode;
1653 GET_SKIP;
1654 newcode = code+skip-1;
1655 GET_ARG; flags = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001656 GET_ARG;
1657 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001658 /* Check that only valid flags are present */
1659 if ((flags & ~(SRE_INFO_PREFIX |
1660 SRE_INFO_LITERAL |
1661 SRE_INFO_CHARSET)) != 0)
1662 FAIL;
1663 /* PREFIX and CHARSET are mutually exclusive */
1664 if ((flags & SRE_INFO_PREFIX) &&
1665 (flags & SRE_INFO_CHARSET))
1666 FAIL;
1667 /* LITERAL implies PREFIX */
1668 if ((flags & SRE_INFO_LITERAL) &&
1669 !(flags & SRE_INFO_PREFIX))
1670 FAIL;
1671 /* Validate the prefix */
1672 if (flags & SRE_INFO_PREFIX) {
Ross Lagerwall88748d72012-03-06 21:48:57 +02001673 SRE_CODE prefix_len;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001674 GET_ARG; prefix_len = arg;
Ross Lagerwall88748d72012-03-06 21:48:57 +02001675 GET_ARG;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001676 /* Here comes the prefix string */
Benjamin Petersonca470632016-09-06 13:47:26 -07001677 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001678 FAIL;
1679 code += prefix_len;
1680 /* And here comes the overlap table */
Benjamin Petersonca470632016-09-06 13:47:26 -07001681 if (prefix_len > (uintptr_t)(newcode - code))
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001682 FAIL;
1683 /* Each overlap value should be < prefix_len */
1684 for (i = 0; i < prefix_len; i++) {
1685 if (code[i] >= prefix_len)
1686 FAIL;
1687 }
1688 code += prefix_len;
1689 }
1690 /* Validate the charset */
1691 if (flags & SRE_INFO_CHARSET) {
1692 if (!_validate_charset(code, newcode-1))
1693 FAIL;
1694 if (newcode[-1] != SRE_OP_FAILURE)
1695 FAIL;
1696 code = newcode;
1697 }
1698 else if (code != newcode) {
1699 VTRACE(("code=%p, newcode=%p\n", code, newcode));
1700 FAIL;
1701 }
1702 }
1703 break;
1704
1705 case SRE_OP_BRANCH:
1706 {
1707 SRE_CODE *target = NULL;
1708 for (;;) {
1709 GET_SKIP;
1710 if (skip == 0)
1711 break;
1712 /* Stop 2 before the end; we check the JUMP below */
1713 if (!_validate_inner(code, code+skip-3, groups))
1714 FAIL;
1715 code += skip-3;
1716 /* Check that it ends with a JUMP, and that each JUMP
1717 has the same target */
1718 GET_OP;
1719 if (op != SRE_OP_JUMP)
1720 FAIL;
1721 GET_SKIP;
1722 if (target == NULL)
1723 target = code+skip-1;
1724 else if (code+skip-1 != target)
1725 FAIL;
1726 }
1727 }
1728 break;
1729
1730 case SRE_OP_REPEAT_ONE:
1731 case SRE_OP_MIN_REPEAT_ONE:
1732 {
1733 SRE_CODE min, max;
1734 GET_SKIP;
1735 GET_ARG; min = arg;
1736 GET_ARG; max = arg;
1737 if (min > max)
1738 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001739 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001740 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001741 if (!_validate_inner(code, code+skip-4, groups))
1742 FAIL;
1743 code += skip-4;
1744 GET_OP;
1745 if (op != SRE_OP_SUCCESS)
1746 FAIL;
1747 }
1748 break;
1749
1750 case SRE_OP_REPEAT:
1751 {
1752 SRE_CODE min, max;
1753 GET_SKIP;
1754 GET_ARG; min = arg;
1755 GET_ARG; max = arg;
1756 if (min > max)
1757 FAIL;
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02001758 if (max > SRE_MAXREPEAT)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001759 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001760 if (!_validate_inner(code, code+skip-3, groups))
1761 FAIL;
1762 code += skip-3;
1763 GET_OP;
1764 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1765 FAIL;
1766 }
1767 break;
1768
1769 case SRE_OP_GROUPREF:
1770 case SRE_OP_GROUPREF_IGNORE:
Serhiy Storchaka3557b052017-10-24 23:31:42 +03001771 case SRE_OP_GROUPREF_UNI_IGNORE:
1772 case SRE_OP_GROUPREF_LOC_IGNORE:
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001773 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001774 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001775 FAIL;
1776 break;
1777
1778 case SRE_OP_GROUPREF_EXISTS:
1779 /* The regex syntax for this is: '(?(group)then|else)', where
1780 'group' is either an integer group number or a group name,
1781 'then' and 'else' are sub-regexes, and 'else' is optional. */
1782 GET_ARG;
Victor Stinner1fa174a2013-08-28 02:06:21 +02001783 if (arg >= (size_t)groups)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001784 FAIL;
Guido van Rossum92f8f3e2008-09-10 14:30:50 +00001785 GET_SKIP_ADJ(1);
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001786 code--; /* The skip is relative to the first arg! */
1787 /* There are two possibilities here: if there is both a 'then'
1788 part and an 'else' part, the generated code looks like:
1789
1790 GROUPREF_EXISTS
1791 <group>
1792 <skipyes>
1793 ...then part...
1794 JUMP
1795 <skipno>
1796 (<skipyes> jumps here)
1797 ...else part...
1798 (<skipno> jumps here)
1799
1800 If there is only a 'then' part, it looks like:
1801
1802 GROUPREF_EXISTS
1803 <group>
1804 <skip>
1805 ...then part...
1806 (<skip> jumps here)
1807
1808 There is no direct way to decide which it is, and we don't want
1809 to allow arbitrary jumps anywhere in the code; so we just look
1810 for a JUMP opcode preceding our skip target.
1811 */
Benjamin Petersonca470632016-09-06 13:47:26 -07001812 if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001813 code[skip-3] == SRE_OP_JUMP)
1814 {
1815 VTRACE(("both then and else parts present\n"));
1816 if (!_validate_inner(code+1, code+skip-3, groups))
1817 FAIL;
1818 code += skip-2; /* Position after JUMP, at <skipno> */
1819 GET_SKIP;
1820 if (!_validate_inner(code, code+skip-1, groups))
1821 FAIL;
1822 code += skip-1;
1823 }
1824 else {
1825 VTRACE(("only a then part present\n"));
1826 if (!_validate_inner(code+1, code+skip-1, groups))
1827 FAIL;
1828 code += skip-1;
1829 }
1830 break;
1831
1832 case SRE_OP_ASSERT:
1833 case SRE_OP_ASSERT_NOT:
1834 GET_SKIP;
1835 GET_ARG; /* 0 for lookahead, width for lookbehind */
1836 code--; /* Back up over arg to simplify math below */
1837 if (arg & 0x80000000)
1838 FAIL; /* Width too large */
1839 /* Stop 1 before the end; we check the SUCCESS below */
1840 if (!_validate_inner(code+1, code+skip-2, groups))
1841 FAIL;
1842 code += skip-2;
1843 GET_OP;
1844 if (op != SRE_OP_SUCCESS)
1845 FAIL;
1846 break;
1847
1848 default:
1849 FAIL;
1850
1851 }
1852 }
1853
1854 VTRACE(("okay\n"));
1855 return 1;
1856}
1857
1858static int
1859_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1860{
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03001861 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1862 code >= end || end[-1] != SRE_OP_SUCCESS)
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001863 FAIL;
Guido van Rossum10faf6a2008-08-06 19:29:14 +00001864 return _validate_inner(code, end-1, groups);
1865}
1866
1867static int
1868_validate(PatternObject *self)
1869{
1870 if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1871 {
1872 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1873 return 0;
1874 }
1875 else
1876 VTRACE(("Success!\n"));
1877 return 1;
1878}
1879
1880/* -------------------------------------------------------------------- */
Guido van Rossumb700df92000-03-31 14:59:30 +00001881/* match methods */
1882
1883static void
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001884match_dealloc(MatchObject* self)
Guido van Rossumb700df92000-03-31 14:59:30 +00001885{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001886 Py_XDECREF(self->regs);
1887 Py_XDECREF(self->string);
1888 Py_DECREF(self->pattern);
1889 PyObject_DEL(self);
Guido van Rossumb700df92000-03-31 14:59:30 +00001890}
1891
1892static PyObject*
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001893match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
Guido van Rossumb700df92000-03-31 14:59:30 +00001894{
Serhiy Storchaka25324972013-10-16 12:46:28 +03001895 Py_ssize_t length;
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001896 int isbytes, charsize;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001897 Py_buffer view;
1898 PyObject *result;
1899 void* ptr;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001900 Py_ssize_t i, j;
Serhiy Storchaka25324972013-10-16 12:46:28 +03001901
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001902 assert(0 <= index && index < self->groups);
Fredrik Lundh6f013982000-07-03 18:44:21 +00001903 index *= 2;
1904
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001905 if (self->string == Py_None || self->mark[index] < 0) {
1906 /* return default value if the string or group is undefined */
1907 Py_INCREF(def);
1908 return def;
1909 }
Guido van Rossumb700df92000-03-31 14:59:30 +00001910
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001911 ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
Serhiy Storchaka25324972013-10-16 12:46:28 +03001912 if (ptr == NULL)
1913 return NULL;
Serhiy Storchaka7e10dbb2017-02-04 22:53:57 +02001914
1915 i = self->mark[index];
1916 j = self->mark[index+1];
1917 i = Py_MIN(i, length);
1918 j = Py_MIN(j, length);
1919 result = getslice(isbytes, ptr, self->string, i, j);
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03001920 if (isbytes && view.buf != NULL)
Serhiy Storchaka25324972013-10-16 12:46:28 +03001921 PyBuffer_Release(&view);
1922 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00001923}
1924
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001925static Py_ssize_t
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001926match_getindex(MatchObject* self, PyObject* index)
Guido van Rossumb700df92000-03-31 14:59:30 +00001927{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001928 Py_ssize_t i;
Guido van Rossumb700df92000-03-31 14:59:30 +00001929
Guido van Rossumddefaf32007-01-14 03:31:43 +00001930 if (index == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03001931 /* Default value */
1932 return 0;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001933
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001934 if (PyIndex_Check(index)) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001935 i = PyNumber_AsSsize_t(index, NULL);
Serhiy Storchaka977b3ac2016-06-18 16:48:07 +03001936 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001937 else {
1938 i = -1;
Guido van Rossumb700df92000-03-31 14:59:30 +00001939
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001940 if (self->pattern->groupindex) {
1941 index = PyDict_GetItemWithError(self->pattern->groupindex, index);
1942 if (index && PyLong_Check(index)) {
1943 i = PyLong_AsSsize_t(index);
1944 }
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03001945 }
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001946 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001947 if (i < 0 || i >= self->groups) {
1948 /* raise IndexError if we were given a bad group number */
1949 if (!PyErr_Occurred()) {
1950 PyErr_SetString(PyExc_IndexError, "no such group");
1951 }
1952 return -1;
1953 }
Fredrik Lundh6f013982000-07-03 18:44:21 +00001954
1955 return i;
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001956}
1957
1958static PyObject*
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00001959match_getslice(MatchObject* self, PyObject* index, PyObject* def)
Fredrik Lundh436c3d582000-06-29 08:58:44 +00001960{
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001961 Py_ssize_t i = match_getindex(self, index);
1962
1963 if (i < 0) {
1964 return NULL;
1965 }
1966
1967 return match_getslice_by_index(self, i, def);
Guido van Rossumb700df92000-03-31 14:59:30 +00001968}
1969
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001970/*[clinic input]
1971_sre.SRE_Match.expand
1972
1973 template: object
1974
1975Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
1976[clinic start generated code]*/
1977
1978static PyObject *
1979_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
1980/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001981{
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001982 /* delegate to Python code */
1983 return call(
Thomas Wouters9ada3d62006-04-21 09:47:09 +00001984 SRE_PY_MODULE, "_expand",
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03001985 PyTuple_Pack(3, self->pattern, self, template)
Fredrik Lundh5644b7f2000-09-21 17:03:25 +00001986 );
1987}
1988
1989static PyObject*
Fredrik Lundh75f2d672000-06-29 11:34:28 +00001990match_group(MatchObject* self, PyObject* args)
Guido van Rossumb700df92000-03-31 14:59:30 +00001991{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001992 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001993 Py_ssize_t i, size;
Guido van Rossumb700df92000-03-31 14:59:30 +00001994
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001995 size = PyTuple_GET_SIZE(args);
Guido van Rossumb700df92000-03-31 14:59:30 +00001996
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00001997 switch (size) {
1998 case 0:
Serhiy Storchakaba85d692017-03-30 09:09:41 +03001999 result = match_getslice(self, _PyLong_Zero, Py_None);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002000 break;
2001 case 1:
2002 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2003 break;
2004 default:
2005 /* fetch multiple items */
2006 result = PyTuple_New(size);
2007 if (!result)
2008 return NULL;
2009 for (i = 0; i < size; i++) {
2010 PyObject* item = match_getslice(
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +00002011 self, PyTuple_GET_ITEM(args, i), Py_None
2012 );
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002013 if (!item) {
2014 Py_DECREF(result);
2015 return NULL;
2016 }
2017 PyTuple_SET_ITEM(result, i, item);
2018 }
2019 break;
2020 }
2021 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002022}
2023
Eric V. Smith605bdae2016-09-11 08:55:43 -04002024static PyObject*
2025match_getitem(MatchObject* self, PyObject* name)
2026{
2027 return match_getslice(self, name, Py_None);
2028}
2029
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002030/*[clinic input]
2031_sre.SRE_Match.groups
2032
2033 default: object = None
2034 Is used for groups that did not participate in the match.
2035
2036Return a tuple containing all the subgroups of the match, from 1.
2037[clinic start generated code]*/
2038
2039static PyObject *
2040_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2041/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002042{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002043 PyObject* result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002044 Py_ssize_t index;
Guido van Rossumb700df92000-03-31 14:59:30 +00002045
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002046 result = PyTuple_New(self->groups-1);
2047 if (!result)
2048 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002049
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002050 for (index = 1; index < self->groups; index++) {
2051 PyObject* item;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002052 item = match_getslice_by_index(self, index, default_value);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002053 if (!item) {
2054 Py_DECREF(result);
2055 return NULL;
2056 }
2057 PyTuple_SET_ITEM(result, index-1, item);
2058 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002059
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002060 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002061}
2062
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002063/*[clinic input]
2064_sre.SRE_Match.groupdict
2065
2066 default: object = None
2067 Is used for groups that did not participate in the match.
2068
2069Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2070[clinic start generated code]*/
2071
2072static PyObject *
2073_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2074/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002075{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002076 PyObject *result;
2077 PyObject *key;
2078 PyObject *value;
2079 Py_ssize_t pos = 0;
2080 Py_hash_t hash;
Guido van Rossumb700df92000-03-31 14:59:30 +00002081
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002082 result = PyDict_New();
2083 if (!result || !self->pattern->groupindex)
2084 return result;
Guido van Rossumb700df92000-03-31 14:59:30 +00002085
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002086 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) {
Fredrik Lundh770617b2001-01-14 15:06:11 +00002087 int status;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002088 Py_INCREF(key);
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002089 value = match_getslice(self, key, default_value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002090 if (!value) {
2091 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002092 goto failed;
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002093 }
2094 status = _PyDict_SetItem_KnownHash(result, key, value, hash);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002095 Py_DECREF(value);
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002096 Py_DECREF(key);
Fredrik Lundh770617b2001-01-14 15:06:11 +00002097 if (status < 0)
2098 goto failed;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002099 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002100
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002101 return result;
Fredrik Lundh770617b2001-01-14 15:06:11 +00002102
2103failed:
Fredrik Lundh770617b2001-01-14 15:06:11 +00002104 Py_DECREF(result);
2105 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002106}
2107
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002108/*[clinic input]
2109_sre.SRE_Match.start -> Py_ssize_t
2110
2111 group: object(c_default="NULL") = 0
2112 /
2113
2114Return index of the start of the substring matched by group.
2115[clinic start generated code]*/
2116
2117static Py_ssize_t
2118_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2119/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002120{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002121 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002122
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002123 if (index < 0) {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002124 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002125 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002126
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002127 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002128 return self->mark[index*2];
Guido van Rossumb700df92000-03-31 14:59:30 +00002129}
2130
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002131/*[clinic input]
2132_sre.SRE_Match.end -> Py_ssize_t
2133
2134 group: object(c_default="NULL") = 0
2135 /
2136
2137Return index of the end of the substring matched by group.
2138[clinic start generated code]*/
2139
2140static Py_ssize_t
2141_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2142/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002143{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002144 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002145
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002146 if (index < 0) {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002147 return -1;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002148 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002149
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002150 /* mark is -1 if group is undefined */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002151 return self->mark[index*2+1];
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002152}
2153
2154LOCAL(PyObject*)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002155_pair(Py_ssize_t i1, Py_ssize_t i2)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002156{
2157 PyObject* pair;
2158 PyObject* item;
2159
2160 pair = PyTuple_New(2);
2161 if (!pair)
2162 return NULL;
2163
Christian Heimes217cfd12007-12-02 14:31:20 +00002164 item = PyLong_FromSsize_t(i1);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002165 if (!item)
2166 goto error;
2167 PyTuple_SET_ITEM(pair, 0, item);
2168
Christian Heimes217cfd12007-12-02 14:31:20 +00002169 item = PyLong_FromSsize_t(i2);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002170 if (!item)
2171 goto error;
2172 PyTuple_SET_ITEM(pair, 1, item);
2173
2174 return pair;
2175
2176 error:
2177 Py_DECREF(pair);
2178 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002179}
2180
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002181/*[clinic input]
2182_sre.SRE_Match.span
2183
2184 group: object(c_default="NULL") = 0
2185 /
2186
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002187For match object m, return the 2-tuple (m.start(group), m.end(group)).
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002188[clinic start generated code]*/
2189
2190static PyObject *
2191_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002192/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
Guido van Rossumb700df92000-03-31 14:59:30 +00002193{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002194 Py_ssize_t index = match_getindex(self, group);
Fredrik Lundh436c3d582000-06-29 08:58:44 +00002195
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02002196 if (index < 0) {
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002197 return NULL;
2198 }
Guido van Rossumb700df92000-03-31 14:59:30 +00002199
Fredrik Lundh510c97b2000-09-02 16:36:57 +00002200 /* marks are -1 if group is undefined */
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002201 return _pair(self->mark[index*2], self->mark[index*2+1]);
2202}
2203
2204static PyObject*
2205match_regs(MatchObject* self)
2206{
2207 PyObject* regs;
2208 PyObject* item;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002209 Py_ssize_t index;
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002210
2211 regs = PyTuple_New(self->groups);
2212 if (!regs)
2213 return NULL;
2214
2215 for (index = 0; index < self->groups; index++) {
2216 item = _pair(self->mark[index*2], self->mark[index*2+1]);
2217 if (!item) {
2218 Py_DECREF(regs);
2219 return NULL;
2220 }
2221 PyTuple_SET_ITEM(regs, index, item);
2222 }
2223
2224 Py_INCREF(regs);
2225 self->regs = regs;
2226
2227 return regs;
Guido van Rossumb700df92000-03-31 14:59:30 +00002228}
2229
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002230/*[clinic input]
2231_sre.SRE_Match.__copy__
2232
2233[clinic start generated code]*/
2234
2235static PyObject *
2236_sre_SRE_Match___copy___impl(MatchObject *self)
2237/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002238{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002239 Py_INCREF(self);
2240 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002241}
2242
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002243/*[clinic input]
2244_sre.SRE_Match.__deepcopy__
2245
2246 memo: object
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002247 /
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002248
2249[clinic start generated code]*/
2250
2251static PyObject *
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002252_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo)
2253/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002254{
Serhiy Storchakafdbd0112017-04-16 10:16:03 +03002255 Py_INCREF(self);
2256 return (PyObject *)self;
Fredrik Lundhb0f05bd2001-07-02 16:42:49 +00002257}
2258
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002259PyDoc_STRVAR(match_doc,
2260"The result of re.match() and re.search().\n\
2261Match objects always have a boolean value of True.");
2262
2263PyDoc_STRVAR(match_group_doc,
Andrew Svetlov70dcef42012-12-23 19:59:27 +02002264"group([group1, ...]) -> str or tuple.\n\
Andrew Svetlov56ad5ed2012-12-23 19:23:07 +02002265 Return subgroup(s) of the match by indices or names.\n\
2266 For 0 returns the entire match.");
2267
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002268static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002269match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored))
Guido van Rossumb700df92000-03-31 14:59:30 +00002270{
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002271 if (self->lastindex >= 0)
Antoine Pitrou43fb54c2012-12-02 12:52:36 +01002272 return PyLong_FromSsize_t(self->lastindex);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002273 Py_RETURN_NONE;
Guido van Rossumb700df92000-03-31 14:59:30 +00002274}
2275
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002276static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002277match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored))
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002278{
Serhiy Storchakacd85d0b2017-04-16 09:39:30 +03002279 if (self->pattern->indexgroup &&
2280 self->lastindex >= 0 &&
2281 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup))
2282 {
2283 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup,
2284 self->lastindex);
2285 Py_INCREF(result);
2286 return result;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002287 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002288 Py_RETURN_NONE;
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002289}
2290
2291static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02002292match_regs_get(MatchObject *self, void *Py_UNUSED(ignored))
Amaury Forgeot d'Arce43d33a2008-07-02 20:50:16 +00002293{
2294 if (self->regs) {
2295 Py_INCREF(self->regs);
2296 return self->regs;
2297 } else
2298 return match_regs(self);
2299}
2300
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002301static PyObject *
2302match_repr(MatchObject *self)
2303{
2304 PyObject *result;
2305 PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2306 if (group0 == NULL)
2307 return NULL;
2308 result = PyUnicode_FromFormat(
sth8b91eda2019-03-10 11:29:14 +01002309 "<%s object; span=(%zd, %zd), match=%.50R>",
Serhiy Storchaka36af10c2013-10-20 13:13:31 +03002310 Py_TYPE(self)->tp_name,
2311 self->mark[0], self->mark[1], group0);
2312 Py_DECREF(group0);
2313 return result;
2314}
2315
2316
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002317static PyObject*
Victor Stinnerf5587782013-11-15 23:21:11 +01002318pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002319{
2320 /* create match object (from state object) */
2321
2322 MatchObject* match;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002323 Py_ssize_t i, j;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002324 char* base;
2325 int n;
2326
2327 if (status > 0) {
2328
2329 /* create match object (with room for extra group marks) */
Christian Heimes587c2bf2008-01-19 16:21:02 +00002330 /* coverity[ampersand_in_size] */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002331 match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2332 2*(pattern->groups+1));
2333 if (!match)
2334 return NULL;
2335
2336 Py_INCREF(pattern);
2337 match->pattern = pattern;
2338
2339 Py_INCREF(state->string);
2340 match->string = state->string;
2341
2342 match->regs = NULL;
2343 match->groups = pattern->groups+1;
2344
2345 /* fill in group slices */
2346
2347 base = (char*) state->beginning;
2348 n = state->charsize;
2349
2350 match->mark[0] = ((char*) state->start - base) / n;
2351 match->mark[1] = ((char*) state->ptr - base) / n;
2352
2353 for (i = j = 0; i < pattern->groups; i++, j+=2)
2354 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2355 match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2356 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2357 } else
2358 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2359
2360 match->pos = state->pos;
2361 match->endpos = state->endpos;
2362
2363 match->lastindex = state->lastindex;
2364
2365 return (PyObject*) match;
2366
2367 } else if (status == 0) {
2368
2369 /* no match */
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02002370 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002371
2372 }
2373
2374 /* internal error */
2375 pattern_error(status);
2376 return NULL;
2377}
2378
2379
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002380/* -------------------------------------------------------------------- */
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002381/* scanner methods (experimental) */
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002382
2383static void
Fredrik Lundhbe2211e2000-06-29 16:57:40 +00002384scanner_dealloc(ScannerObject* self)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002385{
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002386 state_fini(&self->state);
Antoine Pitrou82feb1f2010-01-14 17:34:48 +00002387 Py_XDECREF(self->pattern);
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002388 PyObject_DEL(self);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002389}
2390
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002391/*[clinic input]
2392_sre.SRE_Scanner.match
2393
2394[clinic start generated code]*/
2395
2396static PyObject *
2397_sre_SRE_Scanner_match_impl(ScannerObject *self)
2398/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002399{
2400 SRE_STATE* state = &self->state;
2401 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002402 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002403
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002404 if (state->start == NULL)
2405 Py_RETURN_NONE;
2406
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002407 state_reset(state);
2408
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002409 state->ptr = state->start;
2410
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002411 status = sre_match(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002412 if (PyErr_Occurred())
2413 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002414
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002415 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002416 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002417
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002418 if (status == 0)
2419 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002420 else {
2421 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002422 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002423 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002424
2425 return match;
2426}
2427
2428
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002429/*[clinic input]
2430_sre.SRE_Scanner.search
2431
2432[clinic start generated code]*/
2433
2434static PyObject *
2435_sre_SRE_Scanner_search_impl(ScannerObject *self)
2436/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002437{
2438 SRE_STATE* state = &self->state;
2439 PyObject* match;
Victor Stinner7a6d7cf2012-10-31 00:37:41 +01002440 Py_ssize_t status;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002441
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002442 if (state->start == NULL)
2443 Py_RETURN_NONE;
2444
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00002445 state_reset(state);
2446
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002447 state->ptr = state->start;
2448
Serhiy Storchaka9eabac62013-10-26 10:45:48 +03002449 status = sre_search(state, PatternObject_GetCode(self->pattern));
Thomas Wouters89f507f2006-12-13 04:49:30 +00002450 if (PyErr_Occurred())
2451 return NULL;
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002452
Fredrik Lundh75f2d672000-06-29 11:34:28 +00002453 match = pattern_new_match((PatternObject*) self->pattern,
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002454 state, status);
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002455
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002456 if (status == 0)
2457 state->start = NULL;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002458 else {
2459 state->must_advance = (state->ptr == state->start);
Serhiy Storchaka03d6ee32015-07-06 13:58:33 +03002460 state->start = state->ptr;
Serhiy Storchaka70d56fb2017-12-04 14:29:05 +02002461 }
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +00002462
2463 return match;
2464}
2465
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002466static PyObject *
2467pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002468{
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002469 ScannerObject* scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002470
2471 /* create scanner object */
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002472 scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2473 if (!scanner)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002474 return NULL;
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002475 scanner->pattern = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002476
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002477 /* create search state object */
2478 if (!state_init(&scanner->state, self, string, pos, endpos)) {
2479 Py_DECREF(scanner);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002480 return NULL;
2481 }
2482
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002483 Py_INCREF(self);
2484 scanner->pattern = (PyObject*) self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002485
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002486 return (PyObject*) scanner;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002487}
2488
Victor Stinnerb44fb122016-11-21 16:35:08 +01002489static Py_hash_t
2490pattern_hash(PatternObject *self)
2491{
2492 Py_hash_t hash, hash2;
2493
2494 hash = PyObject_Hash(self->pattern);
2495 if (hash == -1) {
2496 return -1;
2497 }
2498
2499 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2500 hash ^= hash2;
2501
2502 hash ^= self->flags;
2503 hash ^= self->isbytes;
2504 hash ^= self->codesize;
2505
2506 if (hash == -1) {
2507 hash = -2;
2508 }
2509 return hash;
2510}
2511
2512static PyObject*
2513pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2514{
2515 PatternObject *left, *right;
2516 int cmp;
2517
2518 if (op != Py_EQ && op != Py_NE) {
2519 Py_RETURN_NOTIMPLEMENTED;
2520 }
2521
2522 if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2523 Py_RETURN_NOTIMPLEMENTED;
2524 }
Victor Stinnerbcf4dcc2016-11-22 15:30:38 +01002525
2526 if (lefto == righto) {
2527 /* a pattern is equal to itself */
2528 return PyBool_FromLong(op == Py_EQ);
2529 }
2530
Victor Stinnerb44fb122016-11-21 16:35:08 +01002531 left = (PatternObject *)lefto;
2532 right = (PatternObject *)righto;
2533
2534 cmp = (left->flags == right->flags
2535 && left->isbytes == right->isbytes
Victor Stinnere670b2d2016-11-22 15:23:00 +01002536 && left->codesize == right->codesize);
Victor Stinnerb44fb122016-11-21 16:35:08 +01002537 if (cmp) {
2538 /* Compare the code and the pattern because the same pattern can
2539 produce different codes depending on the locale used to compile the
2540 pattern when the re.LOCALE flag is used. Don't compare groups,
2541 indexgroup nor groupindex: they are derivated from the pattern. */
2542 cmp = (memcmp(left->code, right->code,
2543 sizeof(left->code[0]) * left->codesize) == 0);
2544 }
2545 if (cmp) {
2546 cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2547 Py_EQ);
2548 if (cmp < 0) {
2549 return NULL;
2550 }
2551 }
2552 if (op == Py_NE) {
2553 cmp = !cmp;
2554 }
2555 return PyBool_FromLong(cmp);
2556}
2557
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002558#include "clinic/_sre.c.h"
2559
2560static PyMethodDef pattern_methods[] = {
2561 _SRE_SRE_PATTERN_MATCH_METHODDEF
2562 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2563 _SRE_SRE_PATTERN_SEARCH_METHODDEF
2564 _SRE_SRE_PATTERN_SUB_METHODDEF
2565 _SRE_SRE_PATTERN_SUBN_METHODDEF
2566 _SRE_SRE_PATTERN_FINDALL_METHODDEF
2567 _SRE_SRE_PATTERN_SPLIT_METHODDEF
2568 _SRE_SRE_PATTERN_FINDITER_METHODDEF
2569 _SRE_SRE_PATTERN_SCANNER_METHODDEF
2570 _SRE_SRE_PATTERN___COPY___METHODDEF
2571 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2572 {NULL, NULL}
2573};
2574
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002575static PyGetSetDef pattern_getset[] = {
2576 {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2577 "A dictionary mapping group names to group numbers."},
2578 {NULL} /* Sentinel */
2579};
2580
2581#define PAT_OFF(x) offsetof(PatternObject, x)
2582static PyMemberDef pattern_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002583 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
2584 "The pattern string from which the RE object was compiled."},
2585 {"flags", T_INT, PAT_OFF(flags), READONLY,
2586 "The regex matching flags."},
2587 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
2588 "The number of capturing groups in the pattern."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002589 {NULL} /* Sentinel */
2590};
2591
2592static PyTypeObject Pattern_Type = {
2593 PyVarObject_HEAD_INIT(NULL, 0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002594 "re.Pattern",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002595 sizeof(PatternObject), sizeof(SRE_CODE),
2596 (destructor)pattern_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002597 0, /* tp_vectorcall_offset */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002598 0, /* tp_getattr */
2599 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002600 0, /* tp_as_async */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002601 (reprfunc)pattern_repr, /* tp_repr */
2602 0, /* tp_as_number */
2603 0, /* tp_as_sequence */
2604 0, /* tp_as_mapping */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002605 (hashfunc)pattern_hash, /* tp_hash */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002606 0, /* tp_call */
2607 0, /* tp_str */
2608 0, /* tp_getattro */
2609 0, /* tp_setattro */
2610 0, /* tp_as_buffer */
2611 Py_TPFLAGS_DEFAULT, /* tp_flags */
2612 pattern_doc, /* tp_doc */
2613 0, /* tp_traverse */
2614 0, /* tp_clear */
Victor Stinnerb44fb122016-11-21 16:35:08 +01002615 pattern_richcompare, /* tp_richcompare */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002616 offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2617 0, /* tp_iter */
2618 0, /* tp_iternext */
2619 pattern_methods, /* tp_methods */
2620 pattern_members, /* tp_members */
2621 pattern_getset, /* tp_getset */
2622};
2623
Eric V. Smith605bdae2016-09-11 08:55:43 -04002624/* Match objects do not support length or assignment, but do support
2625 __getitem__. */
2626static PyMappingMethods match_as_mapping = {
2627 NULL,
2628 (binaryfunc)match_getitem,
2629 NULL
2630};
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002631
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002632static PyMethodDef match_methods[] = {
2633 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2634 _SRE_SRE_MATCH_START_METHODDEF
2635 _SRE_SRE_MATCH_END_METHODDEF
2636 _SRE_SRE_MATCH_SPAN_METHODDEF
2637 _SRE_SRE_MATCH_GROUPS_METHODDEF
2638 _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2639 _SRE_SRE_MATCH_EXPAND_METHODDEF
2640 _SRE_SRE_MATCH___COPY___METHODDEF
2641 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2642 {NULL, NULL}
2643};
2644
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002645static PyGetSetDef match_getset[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002646 {"lastindex", (getter)match_lastindex_get, (setter)NULL,
2647 "The integer index of the last matched capturing group."},
2648 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
2649 "The name of the last matched capturing group."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002650 {"regs", (getter)match_regs_get, (setter)NULL},
2651 {NULL}
2652};
2653
2654#define MATCH_OFF(x) offsetof(MatchObject, x)
2655static PyMemberDef match_members[] = {
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002656 {"string", T_OBJECT, MATCH_OFF(string), READONLY,
2657 "The string passed to match() or search()."},
2658 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
2659 "The regular expression object."},
2660 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
2661 "The index into the string at which the RE engine started looking for a match."},
2662 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
2663 "The index into the string beyond which the RE engine will not go."},
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002664 {NULL}
2665};
2666
2667/* FIXME: implement setattr("string", None) as a special case (to
2668 detach the associated string, if any */
2669
2670static PyTypeObject Match_Type = {
2671 PyVarObject_HEAD_INIT(NULL,0)
Serhiy Storchaka0b5e61d2017-10-04 20:09:49 +03002672 "re.Match",
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002673 sizeof(MatchObject), sizeof(Py_ssize_t),
2674 (destructor)match_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002675 0, /* tp_vectorcall_offset */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002676 0, /* tp_getattr */
2677 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002678 0, /* tp_as_async */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002679 (reprfunc)match_repr, /* tp_repr */
2680 0, /* tp_as_number */
2681 0, /* tp_as_sequence */
Eric V. Smith605bdae2016-09-11 08:55:43 -04002682 &match_as_mapping, /* tp_as_mapping */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002683 0, /* tp_hash */
2684 0, /* tp_call */
2685 0, /* tp_str */
2686 0, /* tp_getattro */
2687 0, /* tp_setattro */
2688 0, /* tp_as_buffer */
2689 Py_TPFLAGS_DEFAULT, /* tp_flags */
2690 match_doc, /* tp_doc */
2691 0, /* tp_traverse */
2692 0, /* tp_clear */
2693 0, /* tp_richcompare */
2694 0, /* tp_weaklistoffset */
2695 0, /* tp_iter */
2696 0, /* tp_iternext */
2697 match_methods, /* tp_methods */
2698 match_members, /* tp_members */
2699 match_getset, /* tp_getset */
2700};
2701
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002702static PyMethodDef scanner_methods[] = {
2703 _SRE_SRE_SCANNER_MATCH_METHODDEF
2704 _SRE_SRE_SCANNER_SEARCH_METHODDEF
2705 {NULL, NULL}
2706};
2707
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002708#define SCAN_OFF(x) offsetof(ScannerObject, x)
2709static PyMemberDef scanner_members[] = {
2710 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2711 {NULL} /* Sentinel */
2712};
2713
2714static PyTypeObject Scanner_Type = {
2715 PyVarObject_HEAD_INIT(NULL, 0)
2716 "_" SRE_MODULE ".SRE_Scanner",
2717 sizeof(ScannerObject), 0,
2718 (destructor)scanner_dealloc,/* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002719 0, /* tp_vectorcall_offset */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002720 0, /* tp_getattr */
2721 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002722 0, /* tp_as_async */
Larry Hastings2d0a69a2015-05-03 14:49:19 -07002723 0, /* tp_repr */
2724 0, /* tp_as_number */
2725 0, /* tp_as_sequence */
2726 0, /* tp_as_mapping */
2727 0, /* tp_hash */
2728 0, /* tp_call */
2729 0, /* tp_str */
2730 0, /* tp_getattro */
2731 0, /* tp_setattro */
2732 0, /* tp_as_buffer */
2733 Py_TPFLAGS_DEFAULT, /* tp_flags */
2734 0, /* tp_doc */
2735 0, /* tp_traverse */
2736 0, /* tp_clear */
2737 0, /* tp_richcompare */
2738 0, /* tp_weaklistoffset */
2739 0, /* tp_iter */
2740 0, /* tp_iternext */
2741 scanner_methods, /* tp_methods */
2742 scanner_members, /* tp_members */
2743 0, /* tp_getset */
2744};
2745
Guido van Rossumb700df92000-03-31 14:59:30 +00002746static PyMethodDef _functions[] = {
Serhiy Storchakaa860aea2015-05-03 15:54:23 +03002747 _SRE_COMPILE_METHODDEF
2748 _SRE_GETCODESIZE_METHODDEF
Serhiy Storchaka6d336a02017-05-09 23:37:14 +03002749 _SRE_ASCII_ISCASED_METHODDEF
2750 _SRE_UNICODE_ISCASED_METHODDEF
Serhiy Storchaka7186cc22017-05-05 10:42:46 +03002751 _SRE_ASCII_TOLOWER_METHODDEF
2752 _SRE_UNICODE_TOLOWER_METHODDEF
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +00002753 {NULL, NULL}
Guido van Rossumb700df92000-03-31 14:59:30 +00002754};
2755
Martin v. Löwis1a214512008-06-11 05:26:20 +00002756static struct PyModuleDef sremodule = {
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002757 PyModuleDef_HEAD_INIT,
2758 "_" SRE_MODULE,
2759 NULL,
2760 -1,
2761 _functions,
2762 NULL,
2763 NULL,
2764 NULL,
2765 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002766};
2767
2768PyMODINIT_FUNC PyInit__sre(void)
Guido van Rossumb700df92000-03-31 14:59:30 +00002769{
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002770 PyObject* m;
2771 PyObject* d;
Barry Warsaw214a0b132001-08-16 20:33:48 +00002772 PyObject* x;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002773
Benjamin Peterson08bf91c2010-04-11 16:12:57 +00002774 /* Patch object types */
2775 if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2776 PyType_Ready(&Scanner_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00002777 return NULL;
Guido van Rossumb700df92000-03-31 14:59:30 +00002778
Martin v. Löwis1a214512008-06-11 05:26:20 +00002779 m = PyModule_Create(&sremodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002780 if (m == NULL)
Ezio Melotti2aa2b3b2011-09-29 00:58:57 +03002781 return NULL;
Fredrik Lundhb35ffc02001-01-15 12:46:09 +00002782 d = PyModule_GetDict(m);
2783
Christian Heimes217cfd12007-12-02 14:31:20 +00002784 x = PyLong_FromLong(SRE_MAGIC);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002785 if (x) {
2786 PyDict_SetItemString(d, "MAGIC", x);
2787 Py_DECREF(x);
2788 }
Fredrik Lundh9c7eab82001-04-15 19:00:58 +00002789
Christian Heimes217cfd12007-12-02 14:31:20 +00002790 x = PyLong_FromLong(sizeof(SRE_CODE));
Martin v. Löwis78e2f062003-04-19 12:56:08 +00002791 if (x) {
2792 PyDict_SetItemString(d, "CODESIZE", x);
2793 Py_DECREF(x);
2794 }
2795
Serhiy Storchaka70ca0212013-02-16 16:47:47 +02002796 x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2797 if (x) {
2798 PyDict_SetItemString(d, "MAXREPEAT", x);
2799 Py_DECREF(x);
2800 }
2801
Serhiy Storchaka9baa5b22014-09-29 22:49:23 +03002802 x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2803 if (x) {
2804 PyDict_SetItemString(d, "MAXGROUPS", x);
2805 Py_DECREF(x);
2806 }
2807
Neal Norwitzfe537132007-08-26 03:55:15 +00002808 x = PyUnicode_FromString(copyright);
Fredrik Lundh21009b92001-09-18 18:47:09 +00002809 if (x) {
2810 PyDict_SetItemString(d, "copyright", x);
2811 Py_DECREF(x);
2812 }
Martin v. Löwis1a214512008-06-11 05:26:20 +00002813 return m;
Guido van Rossumb700df92000-03-31 14:59:30 +00002814}
2815
Gustavo Niemeyerbe733ee2003-04-20 07:35:44 +00002816/* vim:ts=4:sw=4:et
2817*/